diff --git a/MNN.podspec b/MNN.podspec index 670c3774..eb54ed4f 100644 --- a/MNN.podspec +++ b/MNN.podspec @@ -74,6 +74,6 @@ Pod::Spec.new do |s| end s.compiler_flags = '-arch arm64 -march=armv8.2-a+simd+fp16' - s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half" "$(PODS_TARGET_SRCROOT)/source/backend/coreml/mlmodel/include" "$(PODS_TARGET_SRCROOT)/tools/cv/include"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1 MNN_METAL_ENABLED=1 MNN_SUPPORT_BF16=1 MNN_COREML_ENABLED=1 USE_LZ4_FLAG=1 MNN_INTERNAL_ENABLED=1'} + s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half" "$(PODS_TARGET_SRCROOT)/source/backend/coreml/mlmodel/include" "$(PODS_TARGET_SRCROOT)/tools/cv/include"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1 MNN_METAL_ENABLED=1 MNN_SUPPORT_BF16=1 MNN_COREML_ENABLED=1 USE_LZ4_FLAG=1 MNN_INTERNAL_ENABLED=1 MNN_USE_SPARSE_COMPUTE=1'} s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include"' } end diff --git a/docs/tools/benchmark.md b/docs/tools/benchmark.md index 717219b7..58a24b88 100644 --- a/docs/tools/benchmark.md +++ b/docs/tools/benchmark.md @@ -2,13 +2,17 @@ ## Linux / macOS / Ubuntu [从源码编译](../compile/tools.html#benchmark),然后执行如下命令: ```bash -./benchmark.out models_folder loop_count warm_up_count forwardtype +./benchmark.out models_folder loop_count warm_up_count forwardtype numberThread precision weightSparsity weightSparseBlockNumber ``` 参数如下: - models_folder: benchmark models文件夹,[benchmark models](https://github.com/alibaba/MNN/tree/master/benchmark/models)。 - loop_count: 可选,默认是10 - warm_up_count: 预热次数 - forwardtype: 可选,默认是0,即CPU,forwardtype有0->CPU,1->Metal,3->OpenCL,6->OpenGL,7->Vulkan +- numberThread: 可选,默认是4,为 CPU 线程数或者 GPU 的运行模式 +- precision: 可选,默认是 2 (precision_low) +- weightSparsity: 可选,默认是 0.0 ,在 weightSparsity > 0.5 时且后端支持时,开启稀疏计算 +- weightSparseBlockNumber: 可选,默认是 1 ,仅当 weightSparsity > 0.5 时生效,为稀疏计算 block 大小,越大越有利于稀疏计算的加速,一般选择 1, 4, 8, 16 ## Android 在[benchmark目录](https://github.com/alibaba/MNN/tree/master/benchmark/android)下直接执行脚本`bench_android.sh`,默认编译armv7,加参数-64编译armv8,参数-p将[benchmarkModels](https://github.com/alibaba/MNN/tree/master/benchmark/models) push到机器上。 脚本执行完成在[benchmark目录](https://github.com/alibaba/MNN/tree/master/benchmark/android)下得到测试结果`benchmark.txt` diff --git a/express/Executor.cpp b/express/Executor.cpp index 1d33453b..e44607b2 100644 --- a/express/Executor.cpp +++ b/express/Executor.cpp @@ -107,6 +107,7 @@ void Executor::setGlobalExecutorConfig(MNNForwardType type, const BackendConfig& std::shared_ptr bn(creator->onCreate(info)); mRuntimes[mAttr->firstType] = bn; } + _refreshRuntime(); } int Executor::getCurrentRuntimeStatus(RuntimeStatus statusEnum) { @@ -139,6 +140,7 @@ Executor::Executor(std::shared_ptr backend, MNNForwardType type, int nu defaultConfig.flags = 4; std::shared_ptr defaultBackend(mRuntimes[DEFAULT_BACKUP_RUNTIME_KEY]->onCreate(&defaultConfig)); mAttr->constantBackend = defaultBackend; + _refreshRuntime(); } Executor::~Executor(){ // Do nothing @@ -205,15 +207,38 @@ std::shared_ptr Executor::newExecutor(MNNForwardType type, auto executor = new Executor(runtime, type, numberThread); return std::shared_ptr(executor); } +void Executor::_refreshRuntime() { + mRuntimeInfo.first.clear(); + mRuntimeInfo.second = mRuntimes[DEFAULT_BACKUP_RUNTIME_KEY]; + auto firstIter = mRuntimes.find(getAttr()->firstType); + if (firstIter != mRuntimes.end()) { + mRuntimeInfo.first.insert(std::make_pair(firstIter->first.first, firstIter->second)); + } else { + MNN_ASSERT(false); + } + for (auto& iter : mRuntimes) { + if (iter.first.first != getAttr()->firstType.first) { + mRuntimeInfo.first.insert(std::make_pair(iter.first.first, iter.second)); + } + } +} RuntimeInfo Executor::getRuntime() { - RuntimeInfo info; auto glo = ExecutorScope::Current(); - info.second = glo->mRuntimes[DEFAULT_BACKUP_RUNTIME_KEY]; - for (auto& iter : glo->mRuntimes) { - info.first.insert(std::make_pair(iter.first.first, iter.second)); + return glo->mRuntimeInfo; +} +bool Executor::getComputeInfo(EXPRP expr, Interpreter::SessionInfoCode code, void* ptr) { + if (nullptr == expr) { + return false; } - return info; + if (nullptr == expr->inside()->mCache.get()) { + return false; + } + auto session = expr->inside()->mCache->getSession(); + if (nullptr == session) { + return false; + } + return session->getInfo(code, ptr); } static bool loadCache(std::shared_ptr &rt, const void* buffer, size_t size) { @@ -352,6 +377,7 @@ Executor::RuntimeManager* Executor::RuntimeManager::createRuntimeManager(const S } else { res->mInside->mUserConfig = false; } + glo->_refreshRuntime(); return res; } ExecutorAttr* Executor::getAttr() const { @@ -603,6 +629,7 @@ void Executor::_makeCache(const std::vector& expr, bool forceCPU) { scheduleInfo.pipelineInfo[0].first.info.type = MNN_FORWARD_CPU; } else { scheduleInfo.pipelineInfo[0].first.info.type = current->getAttr()->firstType.first; + scheduleInfo.pipelineInfo[0].first.info.numThread = current->getAttr()->firstType.second; } scheduleInfo.pipelineInfo[0].first.needComputeShape = false; scheduleInfo.pipelineInfo[0].first.needComputeGeometry = mLazyMode != LAZY_CONTENT; diff --git a/include/MNN/Interpreter.hpp b/include/MNN/Interpreter.hpp index 330bc798..3557cf3a 100644 --- a/include/MNN/Interpreter.hpp +++ b/include/MNN/Interpreter.hpp @@ -343,6 +343,9 @@ public: /** Resize Info, int*, 0: ready to execute, 1: need malloc, 2: need resize */ RESIZE_STATUS = 3, + + /** Mode / NumberThread, int* */ + THREAD_NUMBER = 4, ALL }; diff --git a/include/MNN/MNNDefine.h b/include/MNN/MNNDefine.h index 9f2abed4..b423ed0f 100644 --- a/include/MNN/MNNDefine.h +++ b/include/MNN/MNNDefine.h @@ -69,6 +69,6 @@ MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \ #define STR(x) STR_IMP(x) #define MNN_VERSION_MAJOR 2 #define MNN_VERSION_MINOR 5 -#define MNN_VERSION_PATCH 0 +#define MNN_VERSION_PATCH 1 #define MNN_VERSION STR(MNN_VERSION_MAJOR) "." STR(MNN_VERSION_MINOR) "." STR(MNN_VERSION_PATCH) #endif /* MNNDefine_h */ diff --git a/include/MNN/expr/Executor.hpp b/include/MNN/expr/Executor.hpp index 630247ef..3e793f89 100644 --- a/include/MNN/expr/Executor.hpp +++ b/include/MNN/expr/Executor.hpp @@ -133,11 +133,15 @@ public: friend class StaticModule; RuntimeManager(); }; + static bool getComputeInfo(EXPRP expr, Interpreter::SessionInfoCode code, void* ptr); private: + void _refreshRuntime(); Executor(std::shared_ptr backend, MNNForwardType type, int numberThread); void _makeCache(const std::vector& outputs, bool forceCPU); + // TODO: Remove mRuntimes, only use mRuntimeInfo std::map, std::shared_ptr> mRuntimes; + RuntimeInfo mRuntimeInfo; std::shared_ptr mDebug; std::map> mSubGraph; LazyMode mLazyMode = LAZY_FULL; diff --git a/project/ios/MNN.xcodeproj/project.pbxproj b/project/ios/MNN.xcodeproj/project.pbxproj index 00f0bcae..618d066b 100644 --- a/project/ios/MNN.xcodeproj/project.pbxproj +++ b/project/ios/MNN.xcodeproj/project.pbxproj @@ -3953,7 +3953,7 @@ CODE_SIGN_STYLE = Automatic; DEAD_CODE_STRIPPING = YES; DEFINES_MODULE = YES; - DEVELOPMENT_TEAM = Q48UX93J22; + DEVELOPMENT_TEAM = 6G7464HHUS; DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_CURRENT_VERSION = 1; DYLIB_INSTALL_NAME_BASE = "@rpath"; @@ -3971,6 +3971,7 @@ "ENABLE_ARMV82=1", "MNN_COREML_ENABLED=1", "USE_LZ4_FLAG=1", + "MNN_USE_SPARSE_COMPUTE=1", ); GCC_SYMBOLS_PRIVATE_EXTERN = YES; GCC_WARN_SHADOW = NO; @@ -3995,7 +3996,7 @@ METAL_LIBRARY_FILE_BASE = mnn; ONLY_ACTIVE_ARCH = YES; OTHER_CFLAGS = ""; - PRODUCT_BUNDLE_IDENTIFIER = jiuqi.bbbbb.test; + PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.playground.abcd; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; PROVISIONING_PROFILE_SPECIFIER = ""; "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = ""; @@ -4016,7 +4017,7 @@ CODE_SIGN_STYLE = Automatic; DEAD_CODE_STRIPPING = YES; DEFINES_MODULE = YES; - DEVELOPMENT_TEAM = Q48UX93J22; + DEVELOPMENT_TEAM = 6G7464HHUS; DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_CURRENT_VERSION = 1; DYLIB_INSTALL_NAME_BASE = "@rpath"; @@ -4033,6 +4034,7 @@ "ENABLE_ARMV82=1", "MNN_COREML_ENABLED=1", "USE_LZ4_FLAG=1", + "MNN_USE_SPARSE_COMPUTE=1", ); GCC_SYMBOLS_PRIVATE_EXTERN = YES; GCC_WARN_SHADOW = YES; @@ -4056,7 +4058,7 @@ MACH_O_TYPE = staticlib; METAL_LIBRARY_FILE_BASE = mnn; OTHER_CFLAGS = ""; - PRODUCT_BUNDLE_IDENTIFIER = jiuqi.bbbbb.test; + PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.playground.abcd; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; PROVISIONING_PROFILE_SPECIFIER = ""; "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = ""; @@ -4075,7 +4077,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_LAUNCHIMAGE_NAME = LaunchImage; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = Q48UX93J22; + DEVELOPMENT_TEAM = 6G7464HHUS; GCC_ENABLE_CPP_EXCEPTIONS = NO; GCC_ENABLE_CPP_RTTI = NO; HEADER_SEARCH_PATHS = ( @@ -4088,7 +4090,7 @@ IPHONEOS_DEPLOYMENT_TARGET = 9.0; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; - PRODUCT_BUNDLE_IDENTIFIER = jiuqi.bbbbb.test; + PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.playground.abcd; PRODUCT_NAME = "$(TARGET_NAME)"; TARGETED_DEVICE_FAMILY = "1,2"; }; @@ -4100,7 +4102,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_LAUNCHIMAGE_NAME = LaunchImage; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = Q48UX93J22; + DEVELOPMENT_TEAM = 6G7464HHUS; GCC_ENABLE_CPP_EXCEPTIONS = NO; GCC_ENABLE_CPP_RTTI = NO; HEADER_SEARCH_PATHS = ( @@ -4113,7 +4115,7 @@ IPHONEOS_DEPLOYMENT_TARGET = 9.0; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; - PRODUCT_BUNDLE_IDENTIFIER = jiuqi.bbbbb.test; + PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.playground.abcd; PRODUCT_NAME = "$(TARGET_NAME)"; TARGETED_DEVICE_FAMILY = "1,2"; }; diff --git a/pymnn/pip_package/build_deps.py b/pymnn/pip_package/build_deps.py index d3d41127..e0c93570 100644 --- a/pymnn/pip_package/build_deps.py +++ b/pymnn/pip_package/build_deps.py @@ -23,6 +23,10 @@ USE_TRT=False if len(sys.argv) > 1 and sys.argv[1] == '-trt': USE_TRT=True +USE_CUDA=False +if len(sys.argv) > 1 and sys.argv[1] == '-cuda': + USE_CUDA=True + def build_deps(): if os.path.isdir('../../schema/private'): IS_INTERNAL_BUILD = args.internal @@ -49,6 +53,7 @@ def build_deps(): -DCMAKE_LIBRARY_PATH=/usr/local/cuda/lib64/stubs/ ' if USE_TRT else ' ' extra_opts += ' -DMNN_INTERNAL=ON ' if IS_INTERNAL_BUILD else ' ' extra_opts += ' -DMNN_BUILD_TORCH=ON ' if IS_BUILD_TORCH else ' ' + extra_opts += ' -DMNN_CUDA=ON ' if USE_CUDA else ' ' os.system('cmake ' + extra_opts + '-DMNN_BUILD_CONVERTER=on -DMNN_BUILD_TRAIN=ON -DCMAKE_BUILD_TYPE=Release \ -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_AAPL_FMWK=OFF -DMNN_SEP_BUILD=OFF -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON \ diff --git a/pymnn/pip_package/build_manylinux2014.sh b/pymnn/pip_package/build_manylinux2014.sh index 5327f9d2..3ce43fa3 100755 --- a/pymnn/pip_package/build_manylinux2014.sh +++ b/pymnn/pip_package/build_manylinux2014.sh @@ -9,6 +9,10 @@ set -e +echo "clean build cache:" +echo ">>> rm -rf build dist *.egg-info wheelhouse/*" +rm -rf build dist *.egg-info wheelhouse/* + PROJECT_ROOT=$(cd `dirname $0`;cd ../../;pwd) echo $PROJECT_ROOT export PROJECT_ROOT @@ -17,6 +21,8 @@ for PYBIN in /opt/python/*/bin; do "${PYBIN}/pip" install -U numpy if [ "$1" == "-trt" ]; then USE_TRT=true "${PYBIN}/python" setup.py bdist_wheel + elif [ "$1" == "-cuda" ]; then + USE_CUDA=true "${PYBIN}/python" setup.py bdist_wheel else "${PYBIN}/python" setup.py bdist_wheel fi @@ -26,6 +32,8 @@ done for whl in dist/*.whl; do if [ "$1" == "-trt" ]; then LD_LIBRARY_PATH=${PROJECT_ROOT}/pymnn_build/source/backend/tensorrt:$LD_LIBRARY_PATH auditwheel repair "$whl" --plat manylinux2014_x86_64 -w wheelhouse/ + elif [ "$1" == "-cuda" ]; then + LD_LIBRARY_PATH=${PROJECT_ROOT}/pymnn_build/source/backend/cuda:$LD_LIBRARY_PATH auditwheel repair "$whl" --plat manylinux2014_x86_64 -w wheelhouse/ else auditwheel repair "$whl" --plat manylinux2014_x86_64 -w wheelhouse/ fi diff --git a/pymnn/pip_package/setup.py b/pymnn/pip_package/setup.py index a932a269..95557c08 100644 --- a/pymnn/pip_package/setup.py +++ b/pymnn/pip_package/setup.py @@ -59,9 +59,11 @@ def report(*args): package_name = 'MNN' USE_TRT=check_env_flag('USE_TRT') +USE_CUDA = check_env_flag("USE_CUDA") IS_INTERNAL_BUILD = False print ("USE_TRT ", USE_TRT) +print("USE_CUDA:", USE_CUDA) if os.path.isdir('../../schema/private'): IS_INTERNAL_BUILD = args.serving @@ -149,7 +151,8 @@ def configure_extension_build(): engine_library_dirs += [os.path.join(root_dir, BUILD_DIR, "tools", "train")] engine_library_dirs += [os.path.join(root_dir, BUILD_DIR, "tools", "cv")] engine_library_dirs += [os.path.join(root_dir, BUILD_DIR, "source", "backend", "tensorrt")] - if USE_TRT: + engine_library_dirs += [os.path.join(root_dir, BUILD_DIR, "source", "backend", "cuda")] + if USE_TRT or USE_CUDA: # Note: TensorRT-5.1.5.0/lib should be set in $LIBRARY_PATH of the build system. engine_library_dirs += ['/usr/local/cuda/lib64/'] @@ -187,6 +190,7 @@ def configure_extension_build(): engine_include_dirs += [np.get_include()] trt_depend = ['-lTRT_CUDA_PLUGIN', '-lnvinfer', '-lnvparsers', '-lnvinfer_plugin', '-lcudart'] + cuda_depend = ['-lMNN_Cuda_Main'] engine_depend = ['-lMNN'] # enable logging & model authentication on linux. @@ -196,12 +200,16 @@ def configure_extension_build(): if USE_TRT: engine_depend += trt_depend + if USE_CUDA: + engine_depend += cuda_depend + tools_compile_args = [] tools_libraries = [] tools_depend = ['-lMNN', '-lMNNConvertDeps', '-lprotobuf'] tools_library_dirs = [os.path.join(root_dir, BUILD_DIR)] tools_library_dirs += [os.path.join(root_dir, BUILD_DIR, "tools", "converter")] tools_library_dirs += [os.path.join(root_dir, BUILD_DIR, "source", "backend", "tensorrt")] + tools_library_dirs += [os.path.join(root_dir, BUILD_DIR, "source", "backend", "cuda")] tools_library_dirs += [os.path.join(root_dir, BUILD_DIR, "3rd_party", "protobuf", "cmake")] # add libTorch dependency @@ -227,7 +235,7 @@ def configure_extension_build(): os.path.join(torch_lib, 'libc10.dylib')]), ('.dylibs', [os.path.join(torch_path, '.dylibs', 'libiomp5.dylib')])] ''' - if USE_TRT: + if USE_TRT or USE_CUDA: # Note: TensorRT-5.1.5.0/lib should be set in $LIBRARY_PATH of the build system. tools_library_dirs += ['/usr/local/cuda/lib64/'] @@ -269,6 +277,9 @@ def configure_extension_build(): if USE_TRT: tools_depend += trt_depend + if USE_CUDA: + tools_depend += cuda_depend + if IS_DARWIN: engine_link_args += ['-stdlib=libc++'] engine_link_args += ['-Wl,-all_load'] diff --git a/schema/current/CaffeOp_generated.h b/schema/current/CaffeOp_generated.h index 1b2459f8..ad6e17c7 100644 --- a/schema/current/CaffeOp_generated.h +++ b/schema/current/CaffeOp_generated.h @@ -942,6 +942,9 @@ struct IDSTQuanT : public flatbuffers::NativeTable { int32_t aMin; int32_t readType; bool has_scaleInt; + bool shapeInt32; + uint32_t weightSize; + std::vector index; IDSTQuanT() : type(0), useInt32(false), @@ -951,7 +954,9 @@ struct IDSTQuanT : public flatbuffers::NativeTable { aMax(0), aMin(0), readType(0), - has_scaleInt(false) { + has_scaleInt(false), + shapeInt32(false), + weightSize(0) { } }; @@ -993,6 +998,15 @@ struct IDSTQuan FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { bool has_scaleInt() const { return GetField(24, 0) != 0; } + bool shapeInt32() const { + return GetField(26, 0) != 0; + } + uint32_t weightSize() const { + return GetField(28, 0); + } + const flatbuffers::Vector *index() const { + return GetPointer *>(30); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyOffset(verifier, 4) && @@ -1008,6 +1022,10 @@ struct IDSTQuan FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyField(verifier, 20) && VerifyField(verifier, 22) && VerifyField(verifier, 24) && + VerifyField(verifier, 26) && + VerifyField(verifier, 28) && + VerifyOffset(verifier, 30) && + verifier.VerifyVector(index()) && verifier.EndTable(); } IDSTQuanT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -1051,6 +1069,15 @@ struct IDSTQuanBuilder { void add_has_scaleInt(bool has_scaleInt) { fbb_.AddElement(24, static_cast(has_scaleInt), 0); } + void add_shapeInt32(bool shapeInt32) { + fbb_.AddElement(26, static_cast(shapeInt32), 0); + } + void add_weightSize(uint32_t weightSize) { + fbb_.AddElement(28, weightSize, 0); + } + void add_index(flatbuffers::Offset> index) { + fbb_.AddOffset(30, index); + } explicit IDSTQuanBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -1075,8 +1102,13 @@ inline flatbuffers::Offset CreateIDSTQuan( int32_t aMax = 0, int32_t aMin = 0, int32_t readType = 0, - bool has_scaleInt = false) { + bool has_scaleInt = false, + bool shapeInt32 = false, + uint32_t weightSize = 0, + flatbuffers::Offset> index = 0) { IDSTQuanBuilder builder_(_fbb); + builder_.add_index(index); + builder_.add_weightSize(weightSize); builder_.add_readType(readType); builder_.add_aMin(aMin); builder_.add_aMax(aMax); @@ -1086,6 +1118,7 @@ inline flatbuffers::Offset CreateIDSTQuan( builder_.add_type(type); builder_.add_alpha(alpha); builder_.add_buffer(buffer); + builder_.add_shapeInt32(shapeInt32); builder_.add_has_scaleInt(has_scaleInt); builder_.add_useInt32(useInt32); return builder_.Finish(); @@ -4390,6 +4423,9 @@ inline void IDSTQuan::UnPackTo(IDSTQuanT *_o, const flatbuffers::resolver_functi { auto _e = aMin(); _o->aMin = _e; }; { auto _e = readType(); _o->readType = _e; }; { auto _e = has_scaleInt(); _o->has_scaleInt = _e; }; + { auto _e = shapeInt32(); _o->shapeInt32 = _e; }; + { auto _e = weightSize(); _o->weightSize = _e; }; + { auto _e = index(); if (_e) { _o->index.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->index[_i] = _e->Get(_i); } } }; } inline flatbuffers::Offset IDSTQuan::Pack(flatbuffers::FlatBufferBuilder &_fbb, const IDSTQuanT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -4411,6 +4447,9 @@ inline flatbuffers::Offset CreateIDSTQuan(flatbuffers::FlatBufferBuild auto _aMin = _o->aMin; auto _readType = _o->readType; auto _has_scaleInt = _o->has_scaleInt; + auto _shapeInt32 = _o->shapeInt32; + auto _weightSize = _o->weightSize; + auto _index = _o->index.size() ? _fbb.CreateVector(_o->index) : 0; return MNN::CreateIDSTQuan( _fbb, _buffer, @@ -4423,7 +4462,10 @@ inline flatbuffers::Offset CreateIDSTQuan(flatbuffers::FlatBufferBuild _aMax, _aMin, _readType, - _has_scaleInt); + _has_scaleInt, + _shapeInt32, + _weightSize, + _index); } inline QuantizedFloatParamT *QuantizedFloatParam::UnPack(const flatbuffers::resolver_function_t *_resolver) const { @@ -5908,7 +5950,10 @@ inline const flatbuffers::TypeTable *IDSTQuanTypeTable() { { flatbuffers::ET_INT, 0, -1 }, { flatbuffers::ET_INT, 0, -1 }, { flatbuffers::ET_INT, 0, -1 }, - { flatbuffers::ET_BOOL, 0, -1 } + { flatbuffers::ET_BOOL, 0, -1 }, + { flatbuffers::ET_BOOL, 0, -1 }, + { flatbuffers::ET_UINT, 0, -1 }, + { flatbuffers::ET_UINT, 1, -1 } }; static const char * const names[] = { "buffer", @@ -5921,10 +5966,13 @@ inline const flatbuffers::TypeTable *IDSTQuanTypeTable() { "aMax", "aMin", "readType", - "has_scaleInt" + "has_scaleInt", + "shapeInt32", + "weightSize", + "index" }; static const flatbuffers::TypeTable tt = { - flatbuffers::ST_TABLE, 11, type_codes, nullptr, nullptr, names + flatbuffers::ST_TABLE, 14, type_codes, nullptr, nullptr, names }; return &tt; } diff --git a/schema/default/CaffeOp.fbs b/schema/default/CaffeOp.fbs index 0eca42f4..631b3d00 100644 --- a/schema/default/CaffeOp.fbs +++ b/schema/default/CaffeOp.fbs @@ -65,6 +65,10 @@ table IDSTQuan { aMin:int; readType:int; has_scaleInt:bool; + shapeInt32:bool = false; + // For sparse + weightSize:uint32; + index:[uint32]; } enum QuantizeAlgo : byte { diff --git a/source/backend/cpu/BinaryUtils.hpp b/source/backend/cpu/BinaryUtils.hpp index 00d9d514..dc1a442d 100644 --- a/source/backend/cpu/BinaryUtils.hpp +++ b/source/backend/cpu/BinaryUtils.hpp @@ -263,100 +263,6 @@ void executeVec(void* outputRaw, const void* inputRaw0, const void* inputRaw1, i } } -template -void executeVecInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, const float* inputScale0, const float* inputScale1, const float* outputScale, int elementSize, int needBroadcast) { - Func compute; - int sizeDivUnit = elementSize / pack; - int remainCount = elementSize - sizeDivUnit * pack; -#ifdef MNN_USE_NEON - sizeDivUnit = (elementSize * 4) / pack; - remainCount = (elementSize * 4) - sizeDivUnit * pack; -#endif - auto src0 = inputRaw0; - auto src1 = inputRaw1; - auto dst = (int8_t*)outputRaw; -#ifdef MNN_USE_SSE - V zeroPointV((uint8_t)(128)); -#else - V zeroPointV((uint8_t)(0)); -#endif - if (-1 == needBroadcast) { - if (sizeDivUnit > 0) { - for (int i = 0; i < sizeDivUnit; ++i) { - V a = V::load(src0); - a -= zeroPointV; - V b = V::load(src1); - b -= zeroPointV; - V::save(dst, compute(a, b) + zeroPointV); - src0 += pack; - src1 += pack; - dst += pack; - } - } - if (remainCount > 0) { - int8_t tempSrc0[pack]; - int8_t tempSrc1[pack]; - int8_t tempDst[pack]; - ::memcpy(tempSrc0, src0, remainCount * sizeof(int8_t)); - ::memcpy(tempSrc1, src1, remainCount * sizeof(int8_t)); - V a = V::load(tempSrc0); - a -= zeroPointV; - V b = V::load(tempSrc1); - b -= zeroPointV; - V::save(tempDst, compute(a, b) + zeroPointV); - ::memcpy(dst, tempDst, remainCount * sizeof(int8_t)); - } - } else if (0 == needBroadcast) { - const int8_t srcValue0 = src0[0]; - V a = V(srcValue0); - a -= zeroPointV; - if (sizeDivUnit > 0) { - for (int i = 0; i < sizeDivUnit; ++i) { - const auto src1Ptr = src1; - auto dstPtr = dst; - V b = V::load(src1Ptr); - b -= zeroPointV; - V::save(dstPtr, compute(a, b) + zeroPointV); - src1 += pack; - dst += pack; - } - } - if (remainCount > 0) { - int8_t tempSrc1[pack]; - int8_t tempDst[pack]; - ::memcpy(tempSrc1, src1, remainCount * sizeof(int8_t)); - V b = V::load(tempSrc1); - b -= zeroPointV; - V::save(tempDst, compute(a, b) + zeroPointV); - ::memcpy(dst, tempDst, remainCount * sizeof(int8_t)); - } - } else { - const int8_t srcValue1 = src1[0]; - V b = V(srcValue1); - b -= zeroPointV; - if (sizeDivUnit > 0) { - for (int i = 0; i < sizeDivUnit; ++i) { - const auto src0Ptr = src0; - auto dstPtr = dst; - V a = V::load(src0Ptr); - a -= zeroPointV; - V::save(dstPtr, compute(a, b) + zeroPointV); - src0 += pack; - dst += pack; - } - } - if (remainCount > 0) { - int8_t tempSrc0[pack]; - int8_t tempDst[pack]; - ::memcpy(tempSrc0, src0, remainCount * sizeof(int8_t)); - V a = V::load(tempSrc0); - a -= zeroPointV; - V::save(tempDst, compute(a, b) +zeroPointV); - ::memcpy(dst, tempDst, remainCount * sizeof(int8_t)); - } - } -} - template struct VecBinaryAdd { Vec operator()(Vec& x, Vec& y) const { @@ -426,43 +332,49 @@ void execute(void* outputRaw, const void* inputRaw0, const void* inputRaw1, int template void executeInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, const float* inputScale0, const float* inputScale1, const float* outputScale, int elementSize, int needBroadcast) { Func f; - int input0DataCount = elementSize; - int input1DataCount = elementSize; + int size = elementSize; #ifdef MNN_USE_NEON - input0DataCount = elementSize * 4; - input1DataCount = elementSize * 4; + size *= 4; #endif - const Tin* input0Data = (const Tin*)inputRaw0; - const Tin* input1Data = (const Tin*)inputRaw1; - Tout* outputData = (Tout*)outputRaw; - + float inp0 = 0, inp1 = 0, output = 0; #ifdef MNN_USE_SSE - const uint8_t zeroPoint = 128; + const int zeroPoint = 128; + const int maxValue = 255; + const int minValue = 0; + const uint8_t* inputData0 = (uint8_t*)inputRaw0; + const uint8_t* inputData1 = (uint8_t*)inputRaw1; + uint8_t* outputData = (uint8_t*)outputRaw; #else - const uint8_t zeroPoint = 0; + const int zeroPoint = 0; + const int maxValue = 127; + const int minValue = -128; + const int8_t* inputData0 = (int8_t*)inputRaw0; + const int8_t* inputData1 = (int8_t*)inputRaw1; + int8_t* outputData = (int8_t*)outputRaw; #endif - if (needBroadcast == 0) { // data count == 1, not only mean scalar input, maybe of shape (1, 1, 1, ...,1) - for (int i = 0; i < input1DataCount; i++) { - inp0 = static_cast((int8_t)(inputRaw0[0] - zeroPoint)) * inputScale0[i]; - inp1 = static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + for (int i = 0; i < size; ++i) { + if (needBroadcast == 0) { + inp0 = (inputData0[0]- zeroPoint) * inputScale0[i]; + inp1 = (inputData1[i]- zeroPoint) * inputScale1[i]; output = f(inp0, inp1); - outputData[i] = (Tout)(output * outputScale[i] + zeroPoint); - } - } else if (needBroadcast == 1) { - for (int i = 0; i < input0DataCount; i++) { - inp0 = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i]; - inp1 = static_cast((int8_t)(inputRaw1[0] - zeroPoint)) * inputScale1[i]; + } else if (needBroadcast == 1) { + inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + inp1 = (inputData1[0] - zeroPoint) * inputScale1[i]; output = f(inp0, inp1); - outputData[i] = (Tout)(output * outputScale[i] + zeroPoint); - } - } else { // both input contains more than one element,which means no scalar input - for (int i = 0; i < input0DataCount; i++) { - inp0 = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i]; - inp1 = static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + } else { + inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; output = f(inp0, inp1); - outputData[i] = (Tout)(output * outputScale[i] + zeroPoint); } + int value = (int)roundf(output * outputScale[i]) + zeroPoint; + if (value > maxValue) { + value = maxValue; + } + if (value < minValue) { + value = minValue; + } + outputData[i] = value; } } diff --git a/source/backend/cpu/CPUBackend.cpp b/source/backend/cpu/CPUBackend.cpp index bef26213..474e8198 100644 --- a/source/backend/cpu/CPUBackend.cpp +++ b/source/backend/cpu/CPUBackend.cpp @@ -104,9 +104,16 @@ float CPURuntime::onGetMemoryInMB() { auto staticMemoryInMB = mStaticAllocator->totalSize() / 1024.0f / 1024.0f; return staticMemoryInMB; } - - - +bool CPURuntime::onCheckInfo(Backend::Info& info) const { +#ifdef MNN_USE_THREAD_POOL + int threadNumber = mThreadNumber; + if (mTaskIndex < 0) { + threadNumber = 1; + } + info.numThread = threadNumber; +#endif + return true; +} Backend* CPURuntime::onCreate(const BackendConfig* config) const { auto precision = mPrecision; diff --git a/source/backend/cpu/CPUBackend.hpp b/source/backend/cpu/CPUBackend.hpp index cf8854bf..bb60d7c5 100644 --- a/source/backend/cpu/CPUBackend.hpp +++ b/source/backend/cpu/CPUBackend.hpp @@ -31,6 +31,8 @@ public: } void onConcurrencyBegin() const; void onConcurrencyEnd() const; + virtual bool onCheckInfo(Backend::Info& info) const override; + private: std::shared_ptr mStaticAllocator; diff --git a/source/backend/cpu/CPUBinaryInt8.cpp b/source/backend/cpu/CPUBinaryInt8.cpp index 3f29ee36..285d9c59 100644 --- a/source/backend/cpu/CPUBinaryInt8.cpp +++ b/source/backend/cpu/CPUBinaryInt8.cpp @@ -35,13 +35,12 @@ ErrorCode CPUBinaryInt8::onResize(const std::vector& inputs, const std: } MNN_ASSERT(mTotalSize == ((CPUBackend*)backend())->getTensorSize(outputs[0])); - std::vector scale0(mTotalSize), scale1(mTotalSize), outputScale(mTotalSize); - std::fill(scale0.begin(), scale0.end(), TensorUtils::getDescribe(inputs[0])->quantAttr->scale); - std::fill(scale1.begin(), scale1.end(), TensorUtils::getDescribe(inputs[1])->quantAttr->scale); - std::fill(outputScale.begin(), outputScale.end(), 1 / TensorUtils::getDescribe(outputs[0])->quantAttr->scale); - mInputQuant0 = scale0; - mInputQuant1 = scale1; - mOutputQuant = outputScale; + mInputQuant0.resize(mTotalSize); + mInputQuant1.resize(mTotalSize); + mOutputQuant.resize(mTotalSize); + std::fill(mInputQuant0.begin(), mInputQuant0.end(), TensorUtils::getDescribe(inputs[0])->quantAttr->scale); + std::fill(mInputQuant1.begin(), mInputQuant1.end(), TensorUtils::getDescribe(inputs[1])->quantAttr->scale); + std::fill(mOutputQuant.begin(), mOutputQuant.end(), 1 / TensorUtils::getDescribe(outputs[0])->quantAttr->scale); if(mActivationType == 1 && outputs[0]->getType().code == halide_type_float) { mActivationExe.reset(new CPURelu(backend(), 0.0)); @@ -56,15 +55,10 @@ ErrorCode CPUBinaryInt8::onExecute(const std::vector& inputs, const std auto output = outputs[0]; auto schedule = ((CPUBackend*)backend())->multiThreadDivide(mTotalSize); -#ifdef MNN_USE_SSE - auto input0Ptr = input->host(); - auto input1Ptr = input1->host(); - auto outputPtr = outputs[0]->host(); -#else + auto input0Ptr = input->host(); auto input1Ptr = input1->host(); auto outputPtr = outputs[0]->host(); -#endif int inpBytes = 1; int outBytes = 1; @@ -90,7 +84,7 @@ ErrorCode CPUBinaryInt8::onExecute(const std::vector& inputs, const std #ifdef MNN_USE_NEON mProc(out, inp0, inp1, scale0, scale1, scaleDst, realSize / 4, mNeedBroadcastIndex); #else - mProc((int8_t*)out, (int8_t*)inp0, (int8_t*)inp1, scale0, scale1, scaleDst, realSize, mNeedBroadcastIndex); + mProc(out, inp0, inp1, scale0, scale1, scaleDst, realSize, mNeedBroadcastIndex); #endif } } diff --git a/source/backend/cpu/CPUConvolution.hpp b/source/backend/cpu/CPUConvolution.hpp index 4a4d151a..89a9b0e9 100644 --- a/source/backend/cpu/CPUConvolution.hpp +++ b/source/backend/cpu/CPUConvolution.hpp @@ -40,19 +40,21 @@ public: }; class CPUConvolution : public Execution { public: + struct ResourceDequantizeInfo { + int bits = 32; + std::shared_ptr mScaleBias; + std::vector mLowBitWeightMap; + }; struct Resource { std::shared_ptr mWeight; std::shared_ptr mBias; + ResourceDequantizeInfo mDequantize; Backend* backend; bool copyBiasAlign(const float* bias, int outputCount); - ~ Resource() { - if (nullptr != mBias) { - backend->onReleaseBuffer(mBias.get(), Backend::STATIC); - } - if (nullptr != mWeight) { - backend->onReleaseBuffer(mWeight.get(), Backend::STATIC); - } - } + int hU; + int lU; + int lP; + int hP; }; struct ResourceInt8 { std::vector mInt8WeightKernelSum; diff --git a/source/backend/cpu/compute/CommonOptFunction.cpp b/source/backend/cpu/compute/CommonOptFunction.cpp index 4d56ea4a..a005542e 100644 --- a/source/backend/cpu/compute/CommonOptFunction.cpp +++ b/source/backend/cpu/compute/CommonOptFunction.cpp @@ -19,7 +19,6 @@ #include #include "../CPURuntime.hpp" #include "common/MemoryFormater.h" -#include "common/CommonCompute.hpp" // TODO: Find better way to optimize it #include "../CPUBinary.hpp" #include "../CPUUnary.hpp" @@ -174,107 +173,6 @@ void MNNUnpackC2Common(T* dst, const T* src, size_t area, size_t depth, int* are } } -/* - source: source matrix is h x l - transpose: if false, export compressed matrix as h x l, other export as l x h. - */ -void MNNPackForSparseMatMul_B(float* dest, unsigned int* NNZMap, int* dataOffsetMap, int sparseBlockOC, const float* source, size_t h, size_t l, const int eP, bool transpose) { - // 1. in convolution, source B layout is OC x (KH * KW * IC), - // the dest layout of weight is BCSC(block compressed sparse colum) format, which is OC(!=0) x (KH*KW*IC!=0), as a canceled result, just do BCSR, transpose should be false. - // 2. in ordinary sparse MatMul, transpose is corresponding to BCSR or BCSC - - // BCSR - if (transpose) { - int rowOffset = 0; - for (int i = 0; i < l; i += 1) { - *NNZMap = 0; - for(int j = 0; j < h; j += sparseBlockOC) { - if(!MNN::CommonCompute::checkAllZeros(source + j * l + i, l, sparseBlockOC, 1)) { - *dest = *(source + j * l + l); - dest++; - *NNZMap = *NNZMap + 1; - *dataOffsetMap = rowOffset; - dataOffsetMap++; - rowOffset = 0; - } - rowOffset += eP; - } - NNZMap++; - rowOffset -= h * eP; - } - } else { // BCSC - int columOffset = 0; - int i = 0; - for (; i + sparseBlockOC <= h; i += sparseBlockOC) { - *NNZMap = 0; - for(int j = 0; j < l; j += 1) { - if (!MNN::CommonCompute::checkAllZeros(source, l, sparseBlockOC, 1)) { - for (int ioc = 0; ioc < sparseBlockOC; ioc++) { - *dest = *(source + ioc * l); - dest++; - } - *NNZMap = *NNZMap + 1; - *dataOffsetMap = columOffset; - dataOffsetMap++; - columOffset = 0; - } - columOffset += eP; - source++; - } - NNZMap++; - source += l * (sparseBlockOC - 1); - columOffset -= l * eP; - } - - for (; i < h; i++) { - *NNZMap = 0; - for(int j = 0; j < l; j++) { - if (*source != 0.0f) { - *dest = *source; - dest++; - *NNZMap = *NNZMap + 1; - *dataOffsetMap = columOffset; - dataOffsetMap++; - columOffset = 0; - } - columOffset += eP; - source++; - } - NNZMap++; - columOffset -= l * eP; - } - - *dataOffsetMap = columOffset; // - } - return; -} - - -void MNNGetOptimalBlockShape(size_t& weightNNZElement, size_t& weightBlockNumber, const float* source, int sparseBlockOC, size_t h, size_t l) { - size_t nnzBlock = 0; - size_t nnzTail = 0; - int ocEven = (h / sparseBlockOC) * sparseBlockOC; - size_t ioc = 0; - for (; ioc < ocEven; ioc += sparseBlockOC) { - for (size_t i = 0; i < l; i++) { - bool isZero = MNN::CommonCompute::checkAllZeros(source, l, sparseBlockOC, 1); - nnzBlock += !isZero; - source++; - } - source += (sparseBlockOC - 1) * l; - } - for (; ioc < h; ioc++) { - for (size_t i = 0; i < l; i++) { - bool isZero = (*source) == 0.0f; - nnzTail += !isZero; - source++; - } - } - weightNNZElement = nnzBlock * sparseBlockOC + nnzTail; - weightBlockNumber = nnzBlock + nnzTail; - return; -} - #ifndef MNN_USE_NEON void MNNGetMatMulPackMode(int* eP, int *lP, int* hP) { @@ -2875,8 +2773,6 @@ void MNNCoreFunctionInit() { gCoreFunction->MNNPackedMatMulRemain = MNNPackedMatMulRemain; gCoreFunction->MNNGetSparseMatMulPackMode = MNNGetSparseMatMulPackMode; - gCoreFunction->MNNPackForSparseMatMul_B = MNNPackForSparseMatMul_B; // sparse packing B - gCoreFunction->MNNGetOptimalBlockShape = MNNGetOptimalBlockShape; gCoreFunction->MNNAdjustOptimalSparseKernel = _MNNAdjustOptimalSparseKernel; gCoreFunction->MNNComputeMatMulForE_1 = MNNComputeMatMulForE_1; @@ -2995,4 +2891,4 @@ void MNNPackC2Origin(double* dst, const double* src, size_t area, size_t depth, areaOffset, }; MNNPackC2(dst, src, area, depth, offset); -} \ No newline at end of file +} diff --git a/source/backend/cpu/compute/CommonOptFunction.h b/source/backend/cpu/compute/CommonOptFunction.h index 76f38099..80c4f60a 100644 --- a/source/backend/cpu/compute/CommonOptFunction.h +++ b/source/backend/cpu/compute/CommonOptFunction.h @@ -198,10 +198,6 @@ struct CoreFunctions { MNNBinaryExecute(*MNNSelectBinaryFunctionForFloat)(int opType); MNNUnaryExecute(*MNNSelectUnaryFunctionForFloat)(int opType, int precisionMode); - // sparse matrix multiply - void(*MNNPackForSparseMatMul_B)(float* dest, unsigned int* NNZMap, int* dataOffsetMap, int sparseBlockOC, const float* source, size_t h, size_t l, const int eP, bool transpose); - void(*MNNGetOptimalBlockShape)(size_t& weightNNZElement, size_t& weightBlockNumber, const float* source, int sparseBlockOC, size_t h, size_t l); - // B matrix is sparsed typedef void(*MNNPackedSparseMatMul)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, unsigned int* NNZMap, int* dataOffsetMap); void(*MNNAdjustOptimalSparseKernel)(int& sparseBlockOC, MNNPackedSparseMatMul& packedSparseMatMul); diff --git a/source/backend/cpu/compute/ConvolutionFloatFactory.cpp b/source/backend/cpu/compute/ConvolutionFloatFactory.cpp index 1c5cbe80..2c655df1 100644 --- a/source/backend/cpu/compute/ConvolutionFloatFactory.cpp +++ b/source/backend/cpu/compute/ConvolutionFloatFactory.cpp @@ -26,29 +26,25 @@ namespace MNN { static Execution* _createUnit(const Tensor* input, const Tensor* output, Backend* backend, const Convolution2D* conv2d, const float* originWeight, size_t originWeightSize, - const float* bias, size_t biasSize) { + const float* bias, size_t biasSize, std::shared_ptr weightQuantInfo, bool supportSparse) { + auto cpuBackend = (CPUBackend*)backend; + bool lowMemory = cpuBackend->memoryMode() == BackendConfig::Memory_Low; auto common = conv2d->common(); #ifdef MNN_USE_ONEDNN return OneDNN::createConvolution(common, backend, originWeight, originWeightSize, bias, biasSize); #endif #ifdef MNN_USE_SPARSE_COMPUTE - - auto core = static_cast(backend)->functions(); - int bytes = core->bytes; -#ifdef MNN_USE_SSE - const bool onlySSENotAVX = core->pack == 4; // no backend of only sse without avx2 or avx512 -#else - const bool onlySSENotAVX = false; -#endif - if (!onlySSENotAVX && bytes == 4 && conv2d->sparseParameter()) { - if (SparseConvolutionTiledExecutor::shouldUseSparseConvolution(originWeightSize, conv2d->sparseParameter())) { - return new SparseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, + if (conv2d->sparseParameter() && nullptr != weightQuantInfo.get()) { + if (supportSparse) { + return new SparseConvolutionTiledExecutor(common, backend, weightQuantInfo->quan, conv2d->sparseParameter(), bias, biasSize); } } - #endif + if (lowMemory || originWeightSize == 0) { + return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo); + } bool fastWay = common->kernelY() == 1 && common->kernelX() == 1 && output->width() == input->width() && output->height() == input->height() && common->strideX() == 1 && common->strideY() == 1; @@ -56,16 +52,12 @@ static Execution* _createUnit(const Tensor* input, const Tensor* output, Backend return new Convolution1x1Strassen(common, backend, originWeight, originWeightSize, bias, biasSize); } if (!ConvolutionWinogradBridge::canUseWinograd(common)) { - return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize); - } - auto cpuBackend = (CPUBackend*)backend; - if (cpuBackend->memoryMode() == BackendConfig::Memory_Low) { - return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize); + return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, nullptr); } PerfConfig convPerfconfig = DenseConvolutionTiledExecutor::bestTileConvolutionConfig(common, input, output, cpuBackend->threadNumber(), backend); auto winogradConfig = ConvolutionWinogradBridge::bestWinogradUnit(common, input, output, cpuBackend->threadNumber(), backend, convPerfconfig); if (winogradConfig.unit <= 1) { - return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize); + return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, nullptr); } return ConvolutionWinogradBridge::createWinogradImpl(common, input, output, backend, originWeight, originWeightSize, bias, biasSize, winogradConfig); @@ -78,22 +70,39 @@ Execution* ConvolutionFloatFactory::create(const std::vector& inputs, c // Multi Input return new ConvolutionTiledExecutorMultiInput(conv2d->common(), backend); } + bool lowMemory = static_cast(backend)->memoryMode() == BackendConfig::Memory_Low && static_cast(backend)->functions()->bytes == 4; const float* originWeight = nullptr; const float* originBias = nullptr; int originWeightSize = 0; int originBiasSize = 0; std::shared_ptr quanCommon; std::unique_ptr externalWeightTensor, externalBiasTensor; + bool supportSparse = false; +#ifdef MNN_USE_SPARSE_COMPUTE + auto core = static_cast(backend)->functions(); + int bytes = core->bytes; +#ifdef MNN_USE_SSE + const bool onlySSENotAVX = core->pack == 4; // no backend of only sse without avx2 or avx512 +#else + const bool onlySSENotAVX = false; +#endif + supportSparse = !onlySSENotAVX && bytes == 4; +#endif if (nullptr != conv2d->quanParameter()) { - quanCommon = ConvolutionCommon::load(conv2d->quanParameter()); + bool forceFloat = false; + if (!supportSparse && conv2d->quanParameter()->index() != nullptr) { + // The weight is storage as float sparse, but the backend don't support sparse compute, expand it + forceFloat = true; + } + quanCommon = ConvolutionCommon::load(conv2d->quanParameter(), forceFloat, lowMemory); if (nullptr == quanCommon) { MNN_ERROR("Memory not Enough, can't extract IDST Convolution: %s \n", op->name()->c_str()); return nullptr; } - if (quanCommon->weightFloat.get() == nullptr) { + if (conv2d->quanParameter()->has_scaleInt()) { if (backend->type() != MNN_FORWARD_CPU) { - // From BF16 + // From BF16 / FP16 return nullptr; } return ConvolutionIntFactory::create(inputs[0], outputs[0], op, backend, quanCommon.get()); @@ -114,7 +123,7 @@ Execution* ConvolutionFloatFactory::create(const std::vector& inputs, c return nullptr; } auto common = conv2d->common(); - if (nullptr == originWeight) { + if (nullptr == originWeight && nullptr != op->main_as_Convolution2D()->weight()) { originWeight = op->main_as_Convolution2D()->weight()->data(); originWeightSize = op->main_as_Convolution2D()->weight()->size(); } @@ -130,7 +139,7 @@ Execution* ConvolutionFloatFactory::create(const std::vector& inputs, c MNN_ASSERT(group > 0); if (1 == group) { return _createUnit(inputs[0], outputs[0], backend, conv2d, originWeight, originWeightSize, - originBias, originBiasSize); + originBias, originBiasSize, quanCommon, supportSparse); } // TODO: Use Geometry to split // Split @@ -144,7 +153,7 @@ Execution* ConvolutionFloatFactory::create(const std::vector& inputs, c for (int i = 0; i < group; ++i) { auto newConvolution = _createUnit(emptyInput.get(), emptyOutput.get(), backend, conv2d, originWeight + groupWeightSize * i, - groupWeightSize, conv2d->bias()->data() + groupOutputCount * i, groupOutputCount); + groupWeightSize, conv2d->bias()->data() + groupOutputCount * i, groupOutputCount, quanCommon, supportSparse); subConvolution.push_back(std::shared_ptr(newConvolution)); } return new ConvolutionGroup(backend, subConvolution); diff --git a/source/backend/cpu/compute/DenseConvolutionTiledExecutor.cpp b/source/backend/cpu/compute/DenseConvolutionTiledExecutor.cpp index 3b814489..b9cf3065 100644 --- a/source/backend/cpu/compute/DenseConvolutionTiledExecutor.cpp +++ b/source/backend/cpu/compute/DenseConvolutionTiledExecutor.cpp @@ -5,7 +5,7 @@ // Created by MNN on 2018/07/16. // Copyright © 2018, Alibaba Group Holding Limited // - +#include #include "DenseConvolutionTiledExecutor.hpp" #include #include "backend/cpu/CPUBackend.hpp" @@ -19,6 +19,7 @@ #include "common/MemoryFormater.h" #define PARAMETERSIZE 6 +#define MNN_ALLOC_MEMORY_INDIRECTLY using Vec4 = MNN::Math::Vec; namespace MNN { @@ -27,10 +28,86 @@ void DenseConvolutionTiledExecutor::initWeight(float *dest, const float *source, function->MNNPackForMatMul_B(dest, cache, outputCount, kernelSize * depth, true); } +static bool _initQuantizeResource(std::shared_ptr int8Info, std::shared_ptr resource, int hU, int hP, int lU, int lP, int outputCount, int srcChannel, int kernelSize) { + int weightLength = hU * lU * hP * lP; + resource->mWeight.reset(Tensor::createDevice( + {weightLength})); + auto res = resource->backend->onAcquireBuffer(resource->mWeight.get(), Backend::STATIC); + if (!res) { + return false; + } + resource->mDequantize.bits = 8; + resource->lU = lU; + resource->hU = hU; + resource->lP = lP; + resource->hP = hP; + // Reorder weight + MNN_ASSERT(lP == 1); + auto dstWInt8 = resource->mWeight->host(); + auto srcWInt8 = int8Info->weight.get(); + for (int y=0; ymDequantize.mScaleBias.reset(MNN::Tensor::createDevice({hU * hP * 2})); + res = resource->backend->onAcquireBuffer(resource->mDequantize.mScaleBias.get(), Backend::STATIC); + if (!res) { + return false; + } + auto alphaPtr = resource->mDequantize.mScaleBias->host(); + auto biasPtr = resource->mDequantize.mScaleBias->host() + hU * hP; + ::memset(alphaPtr, 0, 2 * hU * hP * sizeof(float)); + int h = int8Info->alpha.size(); + if (int8Info->asymmetric) { + h = h / 2; + for (int i=0; ialpha.get()[2 * i + 1]; + biasPtr[i] = int8Info->alpha.get()[2 * i]; + } + } else { + for (int i=0; ialpha.get()[i]; + } + } + if (int8Info->canUseInt4) { + MNN_ASSERT(weightLength % 2 == 0); + weightLength = UP_DIV(weightLength, 2); + resource->mDequantize.bits = 4; + resource->mDequantize.mLowBitWeightMap = int8Info->weightMap; + std::shared_ptr weightLow(Tensor::createDevice( + {weightLength})); + auto res = resource->backend->onAcquireBuffer(weightLow.get(), Backend::STATIC); + if (!res) { + return false; + } + auto srcPtr = resource->mWeight->host(); + auto dstPtr = weightLow->host(); + for (int i=0; iweightReverseMap[(int)s0 + 128]; + s1 = int8Info->weightReverseMap[(int)s1 + 128]; + int d = s0 * 16 + s1; + dstPtr[i] = d; + } + resource->mWeight = weightLow; + } + return true; +} DenseConvolutionTiledExecutor::DenseConvolutionTiledExecutor(const Convolution2DCommon* common, Backend* b, const float* originWeight, size_t originWeightSize, - const float* bias, size_t biasSize) + const float* bias, size_t biasSize, std::shared_ptr int8Info) : ConvolutionTiledExecutor(b, bias, biasSize) { auto outputCount = (int)biasSize; @@ -38,22 +115,40 @@ DenseConvolutionTiledExecutor::DenseConvolutionTiledExecutor(const Convolution2D auto core = static_cast(b)->functions(); int bytes = core->bytes; core->MNNGetMatMulPackMode(&eP, &lP, &hP); + bool useInt8Weight = 0 == originWeightSize; + if (useInt8Weight) { + MNN_ASSERT(nullptr != int8Info.get()); + originWeightSize = int8Info->weight.size(); + } // Don't use common->inputCount for old model common->inputCount is zero auto srcCount = (int)originWeightSize / outputCount / common->kernelX() / common->kernelY(); auto lSize = srcCount * common->kernelX() * common->kernelY(); - mResource->mWeight.reset(Tensor::createDevice( - {UP_DIV(outputCount, hP) * UP_DIV(lSize, lP) * hP * lP * bytes})); - std::shared_ptr cache(Tensor::createDevice({outputCount * srcCount * common->kernelX() * common->kernelY() * (int)sizeof(float)})); // cache must be float - - mValid = mValid && backend()->onAcquireBuffer(mResource->mWeight.get(), Backend::STATIC); - mValid = mValid && backend()->onAcquireBuffer(cache.get(), Backend::STATIC); - if (!mValid) { - return; + auto hU = UP_DIV(outputCount, hP); + auto lU = UP_DIV(lSize, lP); + if (useInt8Weight) { + // Quantize weight to int8 + auto allocSuccess = _initQuantizeResource(int8Info, mResource, hU, hP, lU, lP, outputCount, srcCount, common->kernelX() * common->kernelY()); + if (!allocSuccess) { + mValid = false; + return; + } + } else { + mResource->mWeight.reset(Tensor::createDevice( + {hU * lU * hP * lP * bytes})); + mValid = mValid && backend()->onAcquireBuffer(mResource->mWeight.get(), Backend::STATIC); + if (!mValid) { + return; + } + std::shared_ptr cache(Tensor::createDevice({outputCount * srcCount * common->kernelX() * common->kernelY() * (int)sizeof(float)})); // cache must be float + mValid = mValid && backend()->onAcquireBuffer(cache.get(), Backend::STATIC); + if (!mValid) { + return; + } + initWeight(mResource->mWeight->host(), originWeight, cache->host(), srcCount, outputCount, common->kernelX() * common->kernelY(), core); + // MNN_PRINT("srcCount:%d, outputCount:%d, dense weight matrix tile:", srcCount, outputCount); + // formatMatrix(mResource->mWeight->host(), {UP_DIV(outputCount, hP), lSize, hP}); + backend()->onReleaseBuffer(cache.get(), Backend::STATIC); } - initWeight(mResource->mWeight->host(), originWeight, cache->host(), srcCount, outputCount, common->kernelX() * common->kernelY(), core); - // MNN_PRINT("srcCount:%d, outputCount:%d, dense weight matrix tile:", srcCount, outputCount); - // formatMatrix(mResource->mWeight->host(), {UP_DIV(outputCount, hP), lSize, hP}); - backend()->onReleaseBuffer(cache.get(), Backend::STATIC); mProxy.reset(new DenseConvolutionTiledImpl(common, b)); } @@ -77,6 +172,121 @@ bool DenseConvolutionTiledExecutor::onClone(Backend* bn, const Op* op, Execution return true; } +ErrorCode DenseConvolutionTiledExecutor::onExecute(const std::vector &inputs, const std::vector &outputs) { + bool needDequantize = mResource->mDequantize.bits <= 8; + if (needDequantize) { +#ifndef MNN_ALLOC_MEMORY_INDIRECTLY + auto res = backend()->onAcquireBuffer(mWeightCache.weight.get(), Backend::STATIC); + if (!res) { + return OUT_OF_MEMORY; + } + if (nullptr != mWeightCache.weightInt8) { + res = backend()->onAcquireBuffer(mWeightCache.weightInt8.get(), Backend::STATIC); + if (!res) { + return OUT_OF_MEMORY; + } + } +#endif + auto hU = mResource->hU; + auto hP = mResource->hP; + auto mid = mResource->lU * mResource->lP; + auto srcInt8 = mResource->mWeight->host(); + if (mResource->mDequantize.bits == 4) { + int weightLength = hU * hP * mid; + weightLength = UP_DIV(weightLength, 2); + auto srcPtr = mResource->mWeight->host(); + auto dstPtr = mWeightCache.weightInt8->host(); + for (int i=0; imDequantize.mLowBitWeightMap[s0]; + s1 = mResource->mDequantize.mLowBitWeightMap[s1]; + dstPtr[2 * i + 0] = s0; + dstPtr[2 * i + 1] = s1; + } + srcInt8 = mWeightCache.weightInt8->host(); + } + auto alpha = mResource->mDequantize.mScaleBias->host(); + auto bias = mResource->mDequantize.mScaleBias->host() + hU * hP; + auto dstFloat = mWeightCache.weight->host(); + for (int yo=0; yoonReleaseBuffer(mWeightCache.weightInt8.get(), Backend::STATIC); + } +#endif + } + auto code = mProxy->onExecute(mInputs, outputs); +#ifndef MNN_ALLOC_MEMORY_INDIRECTLY + if (needDequantize) { + backend()->onReleaseBuffer(mWeightCache.weight.get(), Backend::STATIC); + } + ((Runtime*)(static_cast(backend())->getRuntime()))->onGabageCollect(0); +#endif + return code; +} +ErrorCode DenseConvolutionTiledExecutor::onResize(const std::vector &inputs, const std::vector &outputs) { + mInputs = {inputs[0], mResource->mWeight.get(), mResource->mBias.get()}; + bool needDequantize = mResource->mDequantize.bits <= 8; + if (needDequantize) { + if (mWeightCache.weight == nullptr) { + int weightLength = mResource->hU * mResource->lU * mResource->hP * mResource->lP; + mWeightCache.weight.reset(new Tensor); + mWeightCache.weight->buffer().type = halide_type_of(); + TensorUtils::getDescribe(mWeightCache.weight.get())->dimensionFormat = MNN_DATA_FORMAT_NCHW; + mWeightCache.weight->buffer().dimensions = 1; + mWeightCache.weight->setLength(0, weightLength); + if (mWeightCache.weightInt8 == nullptr && mResource->mDequantize.bits == 4) { + mWeightCache.weightInt8.reset(new Tensor); + mWeightCache.weightInt8->buffer().type = halide_type_of(); + mWeightCache.weightInt8->buffer().dimensions = 1; + mWeightCache.weightInt8->setLength(0, weightLength); + TensorUtils::getDescribe(mWeightCache.weightInt8.get())->dimensionFormat = MNN_DATA_FORMAT_NCHW; + } + } + mInputs[1] = mWeightCache.weight.get(); +#ifdef MNN_ALLOC_MEMORY_INDIRECTLY + bool res = false; + if (nullptr != mWeightCache.weightInt8) { + res = backend()->onAcquireBuffer(mWeightCache.weightInt8.get(), Backend::DYNAMIC); + if (!res) { + return OUT_OF_MEMORY; + } + } + res = backend()->onAcquireBuffer(mWeightCache.weight.get(), Backend::DYNAMIC); + if (!res) { + return OUT_OF_MEMORY; + } + if (nullptr != mWeightCache.weightInt8) { + backend()->onReleaseBuffer(mWeightCache.weightInt8.get(), Backend::DYNAMIC); + } +#endif + } + auto code = mProxy->onResize(mInputs, outputs); + if (NO_ERROR != code) { + return code; + } + if (needDequantize) { +#ifdef MNN_ALLOC_MEMORY_INDIRECTLY + backend()->onReleaseBuffer(mWeightCache.weight.get(), Backend::DYNAMIC); +#endif + } + return NO_ERROR; +} + ErrorCode ConvolutionTiledExecutorMultiInput::onExecute(const std::vector& inputs, const std::vector& outputs) { int depth = inputs[1]->channel(); diff --git a/source/backend/cpu/compute/DenseConvolutionTiledExecutor.hpp b/source/backend/cpu/compute/DenseConvolutionTiledExecutor.hpp index 33e974e0..21adcd7c 100644 --- a/source/backend/cpu/compute/DenseConvolutionTiledExecutor.hpp +++ b/source/backend/cpu/compute/DenseConvolutionTiledExecutor.hpp @@ -34,25 +34,25 @@ protected: class DenseConvolutionTiledExecutor : public ConvolutionTiledExecutor { public: DenseConvolutionTiledExecutor(const Convolution2DCommon *common, Backend *b, const float *originWeight, - size_t originWeightSize, const float *bias, size_t biasSize); + size_t originWeightSize, const float *bias, size_t biasSize, std::shared_ptr); DenseConvolutionTiledExecutor(std::shared_ptr res, const Convolution2DCommon *common, Backend* b); virtual ~DenseConvolutionTiledExecutor(); - virtual ErrorCode onExecute(const std::vector &inputs, const std::vector &outputs) override { - return mProxy->onExecute(inputs, outputs); - } - virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override { - mInputs = {inputs[0], mResource->mWeight.get(), mResource->mBias.get()}; - return mProxy->onResize(mInputs, outputs); - } + virtual ErrorCode onExecute(const std::vector &inputs, const std::vector &outputs) override; + virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override; void initWeight(float *dest, const float *source, float* cache, int depth, int outputCount, int kernelSize, const CoreFunctions* function); static PerfConfig bestTileConvolutionConfig(const Convolution2DCommon *common, const Tensor *inputTensor, const Tensor *outputTensor, int threadNumber, Backend* b) { return DenseConvolutionTiledImpl::bestTileConvolutionConfig(common, inputTensor, outputTensor, threadNumber, b); } + struct DequantizeCache { + std::shared_ptr weight; + std::shared_ptr weightInt8; + }; protected: + DequantizeCache mWeightCache; std::shared_ptr mProxy; }; diff --git a/source/backend/cpu/compute/Int8FunctionsOpt.cpp b/source/backend/cpu/compute/Int8FunctionsOpt.cpp index 0401c7a8..a2dc45ab 100644 --- a/source/backend/cpu/compute/Int8FunctionsOpt.cpp +++ b/source/backend/cpu/compute/Int8FunctionsOpt.cpp @@ -1577,130 +1577,255 @@ void MNNMaxPoolInt8(int8_t* dst, int8_t* src, size_t outputWidth, size_t inputWi void MNNBinaryAddInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, const float* inputScale0, const float* inputScale1, const float* outputScale, int elementSize, int needBroadcast) { float sum = 0; #ifdef MNN_USE_SSE - const uint8_t zeroPoint = 128; + const int zeroPoint = 128; + const int maxValue = 255; + const int minValue = 0; + const uint8_t* inputData0 = (uint8_t*)inputRaw0; + const uint8_t* inputData1 = (uint8_t*)inputRaw1; + uint8_t* outputData = (uint8_t*)outputRaw; #else - const uint8_t zeroPoint = 0; + const int zeroPoint = 0; + const int maxValue = 127; + const int minValue = -128; + const int8_t* inputData0 = inputRaw0; + const int8_t* inputData1 = inputRaw1; + int8_t* outputData = outputRaw; #endif for (int i = 0; i < elementSize; ++i) { if (needBroadcast == 0) { - sum = static_cast((int8_t)(inputRaw0[0] - zeroPoint)) * inputScale0[i] + static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[0] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + sum = inp0 + inp1; } else if (needBroadcast == 1) { - sum = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i] + static_cast((int8_t)(inputRaw1[0] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[0] - zeroPoint) * inputScale1[i]; + sum = inp0 + inp1; } else { - sum = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i] + static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + sum = inp0 + inp1; } - float value = sum * outputScale[i]; - outputRaw[i] = static_cast(std::max(std::min(value, 127.0f), -127.0f)) + zeroPoint; + int value = (int)roundf(sum * outputScale[i]) + zeroPoint; + if (value > maxValue) { + value = maxValue; + } + if (value < minValue) { + value = minValue; + } + outputData[i] = value; } } void MNNBinarySubInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, const float* inputScale0, const float* inputScale1, const float* outputScale, int elementSize, int needBroadcast) { float res = 0; #ifdef MNN_USE_SSE - const uint8_t zeroPoint = 128; + const int zeroPoint = 128; + const int maxValue = 255; + const int minValue = 0; + const uint8_t* inputData0 = (uint8_t*)inputRaw0; + const uint8_t* inputData1 = (uint8_t*)inputRaw1; + uint8_t* outputData = (uint8_t*)outputRaw; #else - const uint8_t zeroPoint = 0; + const int zeroPoint = 0; + const int maxValue = 127; + const int minValue = -128; + const int8_t* inputData0 = inputRaw0; + const int8_t* inputData1 = inputRaw1; + int8_t* outputData = outputRaw; #endif for (int i = 0; i < elementSize; ++i) { if (needBroadcast == 0) { - res = static_cast((int8_t)(inputRaw0[0] - zeroPoint)) * inputScale0[i] - static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[0] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + res = inp0 - inp1; } else if (needBroadcast == 1) { - res = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i] - static_cast((int8_t)(inputRaw1[0] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[0] - zeroPoint) * inputScale1[i]; + res = inp0 - inp1; } else { - res = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i] - static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + res = inp0 - inp1; } - float value = res * outputScale[i]; - outputRaw[i] = static_cast(std::max(std::min(value, 127.0f), -127.0f)) + zeroPoint; + int value = (int)roundf(res * outputScale[i]) + zeroPoint; + if (value > maxValue) { + value = maxValue; + } + if (value < minValue) { + value = minValue; + } + outputData[i] = value; } } void MNNBinaryMulInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, const float* inputScale0, const float* inputScale1, const float* outputScale, int elementSize, int needBroadcast) { float res = 0; #ifdef MNN_USE_SSE - const uint8_t zeroPoint = 128; + const int zeroPoint = 128; + const int maxValue = 255; + const int minValue = 0; + const uint8_t* inputData0 = (uint8_t*)inputRaw0; + const uint8_t* inputData1 = (uint8_t*)inputRaw1; + uint8_t* outputData = (uint8_t*)outputRaw; #else - const uint8_t zeroPoint = 0; + const int zeroPoint = 0; + const int maxValue = 127; + const int minValue = -128; + const int8_t* inputData0 = inputRaw0; + const int8_t* inputData1 = inputRaw1; + int8_t* outputData = outputRaw; #endif for (int i = 0; i < elementSize; ++i) { if (needBroadcast == 0) { - res = static_cast((int8_t)(inputRaw0[0] - zeroPoint)) * inputScale0[i] * static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[0] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + res = inp0 * inp1; } else if (needBroadcast == 1) { - res = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i] * static_cast((int8_t)(inputRaw1[0] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[0] - zeroPoint) * inputScale1[i]; + res = inp0 * inp1; } else { - res = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i] * static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + res = inp0 * inp1; } - float value = res * outputScale[i]; - outputRaw[i] = static_cast(std::max(std::min(value, 127.0f), -127.0f)) + zeroPoint; + int value = (int)roundf(res * outputScale[i]) + zeroPoint; + if (value > maxValue) { + value = maxValue; + } + if (value < minValue) { + value = minValue; + } + outputData[i] = value; } } void MNNBinaryMinInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, const float* inputScale0, const float* inputScale1, const float* outputScale, int elementSize, int needBroadcast) { float res = 0; #ifdef MNN_USE_SSE - const uint8_t zeroPoint = 128; + const int zeroPoint = 128; + const int maxValue = 255; + const int minValue = 0; + const uint8_t* inputData0 = (uint8_t*)inputRaw0; + const uint8_t* inputData1 = (uint8_t*)inputRaw1; + uint8_t* outputData = (uint8_t*)outputRaw; #else - const uint8_t zeroPoint = 0; + const int zeroPoint = 0; + const int maxValue = 127; + const int minValue = -128; + const int8_t* inputData0 = inputRaw0; + const int8_t* inputData1 = inputRaw1; + int8_t* outputData = outputRaw; #endif for (int i = 0; i < elementSize; ++i) { if (needBroadcast == 0) { - res = std::min(static_cast((int8_t)(inputRaw0[0] - zeroPoint)) * inputScale0[i], static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]); + float inp0 = (inputData0[0] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + res = std::min(inp0, inp1); } else if (needBroadcast == 1) { - res = std::min(static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i], static_cast((int8_t)(inputRaw1[0] - zeroPoint)) * inputScale1[i]); + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[0] - zeroPoint) * inputScale1[i]; + res = std::min(inp0, inp1); } else { - res = std::min(static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i], static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]); + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + res = std::min(inp0, inp1); } - float value = res * outputScale[i]; - outputRaw[i] = static_cast(std::max(std::min(value, 127.0f), -127.0f)) + zeroPoint; + int value = (int)roundf(res * outputScale[i]) + zeroPoint; + if (value > maxValue) { + value = maxValue; + } + if (value < minValue) { + value = minValue; + } + outputData[i] = value; } } void MNNBinaryMaxInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, const float* inputScale0, const float* inputScale1, const float* outputScale, int elementSize, int needBroadcast) { float res = 0; #ifdef MNN_USE_SSE - const uint8_t zeroPoint = 128; + const int zeroPoint = 128; + const int maxValue = 255; + const int minValue = 0; + const uint8_t* inputData0 = (uint8_t*)inputRaw0; + const uint8_t* inputData1 = (uint8_t*)inputRaw1; + uint8_t* outputData = (uint8_t*)outputRaw; #else - const uint8_t zeroPoint = 0; + const int zeroPoint = 0; + const int maxValue = 127; + const int minValue = -128; + const int8_t* inputData0 = inputRaw0; + const int8_t* inputData1 = inputRaw1; + int8_t* outputData = outputRaw; #endif for (int i = 0; i < elementSize; ++i) { if (needBroadcast == 0) { - res = std::max(static_cast((int8_t)(inputRaw0[0] - zeroPoint)) * inputScale0[i], static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]); + float inp0 = (inputData0[0] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + res = std::max(inp0, inp1); } else if (needBroadcast == 1) { - res = std::max(static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i], static_cast((int8_t)(inputRaw1[0] - zeroPoint)) * inputScale1[i]); + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[0] - zeroPoint) * inputScale1[i]; + res = std::max(inp0, inp1); } else { - res = std::max(static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i], static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]); + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; + res = std::max(inp0, inp1); } - float value = res * outputScale[i]; - outputRaw[i] = static_cast(std::max(std::min(value, 127.0f), -127.0f)) + zeroPoint; + int value = (int)roundf(res * outputScale[i]) + zeroPoint; + if (value > maxValue) { + value = maxValue; + } + if (value < minValue) { + value = minValue; + } + outputData[i] = value; } } void MNNBinarySqdInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, const float* inputScale0, const float* inputScale1, const float* outputScale, int elementSize, int needBroadcast) { - float res = 0, inp0 = 0, inp1 = 0; + float res = 0; #ifdef MNN_USE_SSE - const uint8_t zeroPoint = 128; + const int zeroPoint = 128; + const int maxValue = 255; + const int minValue = 0; + const uint8_t* inputData0 = (uint8_t*)inputRaw0; + const uint8_t* inputData1 = (uint8_t*)inputRaw1; + uint8_t* outputData = (uint8_t*)outputRaw; #else - const uint8_t zeroPoint = 0; + const int zeroPoint = 0; + const int maxValue = 127; + const int minValue = -128; + const int8_t* inputData0 = inputRaw0; + const int8_t* inputData1 = inputRaw1; + int8_t* outputData = outputRaw; #endif for (int i = 0; i < elementSize; ++i) { if (needBroadcast == 0) { - inp0 = static_cast((int8_t)(inputRaw0[0] - zeroPoint)) * inputScale0[i]; - inp1 = static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[0] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; res = (inp0 - inp1) * (inp0 - inp1); } else if (needBroadcast == 1) { - inp0 = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i]; - inp1 = static_cast((int8_t)(inputRaw1[0] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[0] - zeroPoint) * inputScale1[i]; res = (inp0 - inp1) * (inp0 - inp1); } else { - inp0 = static_cast((int8_t)(inputRaw0[i] - zeroPoint)) * inputScale0[i]; - inp1 = static_cast((int8_t)(inputRaw1[i] - zeroPoint)) * inputScale1[i]; + float inp0 = (inputData0[i] - zeroPoint) * inputScale0[i]; + float inp1 = (inputData1[i] - zeroPoint) * inputScale1[i]; res = (inp0 - inp1) * (inp0 - inp1); } - float value = res * outputScale[i]; - outputRaw[i] = static_cast(std::max(std::min(value, 127.0f), -127.0f)) + zeroPoint; + int value = (int)roundf(res * outputScale[i]) + zeroPoint; + if (value > maxValue) { + value = maxValue; + } + if (value < minValue) { + value = minValue; + } + outputData[i] = value; } } - #endif // #ifndef MNN_USE_NEON #ifndef MNN_USE_SSE diff --git a/source/backend/cpu/compute/SparseConvolutionTiledExecutor.cpp b/source/backend/cpu/compute/SparseConvolutionTiledExecutor.cpp index bae3812d..a2b66630 100644 --- a/source/backend/cpu/compute/SparseConvolutionTiledExecutor.cpp +++ b/source/backend/cpu/compute/SparseConvolutionTiledExecutor.cpp @@ -17,16 +17,155 @@ #include "math/Vec.hpp" #include "core/BufferAllocator.hpp" #include "common/MemoryFormater.h" +#include "common/CommonCompute.hpp" using Vec4 = MNN::Math::Vec; namespace MNN { +/* + source: source matrix is h x l + transpose: if false, export compressed matrix as h x l, other export as l x h. + */ + +static int _fillIndex(int32_t* targetIndexes, uint32_t begin, uint32_t end, const uint32_t* indexes, uint32_t indexSize, int indexStart) { + int mid = -1; + int current = -1; + for (int i=indexStart; i= begin) { + mid = i; + current = indexes[i]; + break; + } + } + uint32_t number = end - begin; + for (uint32_t i=0; i= end) { + break; + } + targetIndexes[current - begin] = mid; + mid++; + if (mid >= indexSize) { + break; + } + current = indexes[mid]; + } while (true); + return mid; +} + +static void MNNGetOptimalBlockShape(size_t& weightNNZElement, size_t& weightBlockNumber, const uint32_t* indexes, uint32_t indexSize, int sparseBlockOC, size_t h, size_t l) { + size_t nnzBlock = 0; + size_t nnzTail = 0; + int ocEven = (h / sparseBlockOC) * sparseBlockOC; + std::vector tempIndexes(sparseBlockOC * l); + size_t ioc = 0; + int offset = 0; + for (; ioc < ocEven; ioc += sparseBlockOC) { + offset = _fillIndex(tempIndexes.data(), ioc * l, (ioc+sparseBlockOC) * l, indexes, indexSize, offset); + for (size_t i = 0; i < l; i++) { + bool allZero = true; + for (int u=0; u= 0) { + allZero = false; + break; + } + } + if (!allZero) { + nnzBlock++; + } + } + } + for (; ioc < h; ioc++) { + offset = _fillIndex(tempIndexes.data(), ioc * l, (ioc+1) * l, indexes, indexSize, offset); + for (size_t i = 0; i < l; i++) { + if (tempIndexes[i] >= 0) { + nnzTail++; + } + } + } + weightNNZElement = nnzBlock * sparseBlockOC + nnzTail; + weightBlockNumber = nnzBlock + nnzTail; + return; +} +static void MNNPackForSparseMatMul_B(float* dest, unsigned int* NNZMap, int* dataOffsetMap, int sparseBlockOC, const float* source, const uint32_t* indexes, uint32_t indexSize, size_t h, size_t ic, size_t kernelSize, const int eP) { + // 1. in convolution, source B layout is OC x (KH * KW * IC), + // the dest layout of weight is BCSC(block compressed sparse colum) format, which is OC(!=0) x (KH*KW*IC!=0), as a canceled result, just do BCSR, transpose should be false. + // 2. in ordinary sparse MatMul, transpose is corresponding to BCSR or BCSC + auto l = ic * kernelSize; + + int columOffset = 0; + int i = 0; + std::vector tempIndexes(sparseBlockOC * l); + int offset = 0; + for (; i + sparseBlockOC <= h; i += sparseBlockOC) { + *NNZMap = 0; + offset = _fillIndex(tempIndexes.data(), i * l, (i+sparseBlockOC) * l, indexes, indexSize, offset); + // Origin weight is oc, ic, kernelSize, new weight order is oc, kernelsize, ic + for (int x=0; x= 0) { + allZero = false; + break; + } + } + if (!allZero) { + for (int ioc = 0; ioc < sparseBlockOC; ioc++) { + auto index = tempIndexes[ioc*l + j]; + if (index >= 0) { + *dest = source[index]; + } else { + *dest = 0.0f; + } + dest++; + } + *NNZMap = *NNZMap + 1; + *dataOffsetMap = columOffset; + dataOffsetMap++; + columOffset = 0; + } + columOffset += eP; + } + } + NNZMap++; + columOffset -= l * eP; + } + + for (; i < h; i++) { + *NNZMap = 0; + offset = _fillIndex(tempIndexes.data(), i * l, (i+1) * l, indexes, indexSize, offset); + for (int x=0; x= 0) { + *dest = source[index]; + dest++; + *NNZMap = *NNZMap + 1; + *dataOffsetMap = columOffset; + dataOffsetMap++; + columOffset = 0; + } + columOffset += eP; + } + } + NNZMap++; + columOffset -= l * eP; + } + + *dataOffsetMap = columOffset; // + return; +} void SparseConvolutionTiledExecutor::initWeight(float* dest, unsigned int* NNZMap, int* dataOffsetMap, - int sparseBlockOC, const float* source, float* cache, int depth, + int sparseBlockOC, const float* source, const uint32_t* indexes, uint32_t indexSize, int depth, int outputCount, int kernelSize, int eP, size_t weightNNZElement, size_t weightBlockNumber, const CoreFunctions* function) { - ConvolutionTiledExecutor::initWeight(source, cache, depth, outputCount, kernelSize, function); - function->MNNPackForSparseMatMul_B(dest, NNZMap, dataOffsetMap, sparseBlockOC, cache, outputCount, kernelSize * depth, eP, false); + MNNPackForSparseMatMul_B(dest, NNZMap, dataOffsetMap, sparseBlockOC, source, indexes, indexSize, outputCount, depth, kernelSize, eP); // MNN_PRINT("\nBCSR origin weight:"); // formatMatrix(source, {outputCount, kernelSize * depth}); @@ -40,13 +179,13 @@ void SparseConvolutionTiledExecutor::initWeight(float* dest, unsigned int* NNZMa SparseConvolutionTiledExecutor::SparseConvolutionTiledExecutor(const Convolution2DCommon *common, Backend* b, - const float* originWeight, size_t originWeightSize, const SparseCommon* sparseCommon, + const IDSTQuan* weight, const SparseCommon* sparseCommon, const float* bias, size_t biasSize) : ConvolutionTiledExecutor(b, bias, biasSize) { auto outputCount = (int)biasSize; // Don't use common->inputCount for old model common->inputCount is zero - auto lSize = originWeightSize / outputCount; + auto lSize = weight->weightSize() / outputCount; auto srcCount = lSize / (common->kernelX() * common->kernelY()); int eP, lP, hP; @@ -64,7 +203,7 @@ SparseConvolutionTiledExecutor::SparseConvolutionTiledExecutor(const Convolution if (optimalSparseBlockOC != sparseBlockOC) { size_t optimalWeightNNZElement = weightNNZElement; size_t optimalWeightBlockNumber = weightBlockNumber; - core->MNNGetOptimalBlockShape(optimalWeightNNZElement, optimalWeightBlockNumber, originWeight, optimalSparseBlockOC, outputCount, lSize); + MNNGetOptimalBlockShape(optimalWeightNNZElement, optimalWeightBlockNumber, weight->index()->data(), weight->index()->size(), optimalSparseBlockOC, outputCount, lSize); MNN_ASSERT(sparseBlockOC == 1 || sparseBlockOC == 2 || sparseBlockOC == 4 || sparseBlockOC == 8); // MNN_PRINT("caution: sparsity changed!!!\nsparseBlockOC:%d -> %d weightNNZElement:%zu -> %zu, weightBlockNumber:%zu -> %zu, outputCount:%d, divide:%d, tail:%d\n", // sparseBlockOC, optimalSparseBlockOC, weightNNZElement, optimalWeightNNZElement, weightBlockNumber, optimalWeightBlockNumber, outputCount, outputCount / optimalSparseBlockOC, outputCount % optimalSparseBlockOC); @@ -72,26 +211,25 @@ SparseConvolutionTiledExecutor::SparseConvolutionTiledExecutor(const Convolution weightNNZElement = optimalWeightNNZElement; weightBlockNumber = optimalWeightBlockNumber; } + MNN_ASSERT(weightNNZElement > 0); + MNN_ASSERT(weightBlockNumber > 0); mSparseIndexData.reset(new SparseIndexData(sparseBlockOC, weightNNZElement, weightBlockNumber, backend())); mResource->mWeight.reset(Tensor::createDevice( { static_cast(weightNNZElement + 1) * bytes })); // one more element in case of weight are all zeros - std::shared_ptr cache(Tensor::createDevice({static_cast(outputCount * lSize * sizeof(float))})); // cache must be float mSparseIndexData->mNNZMap.reset(Tensor::createDevice({outputCount / sparseBlockOC + outputCount % sparseBlockOC})); mSparseIndexData->mDataOffsetMap.reset(Tensor::createDevice({static_cast(weightBlockNumber + 1)})); mValid = backend()->onAcquireBuffer(mResource->mWeight.get(), Backend::STATIC); - mValid = mValid && backend()->onAcquireBuffer(cache.get(), Backend::STATIC); mValid = mValid && backend()->onAcquireBuffer(mSparseIndexData->mNNZMap.get(), Backend::STATIC); mValid = mValid && backend()->onAcquireBuffer(mSparseIndexData->mDataOffsetMap.get(), Backend::STATIC); if (!mValid) { return; } - initWeight(mResource->mWeight->host(), mSparseIndexData->mNNZMap->host(), mSparseIndexData->mDataOffsetMap->host(), sparseBlockOC, originWeight, cache->host(), srcCount, outputCount, common->kernelX() * common->kernelY(), eP, weightNNZElement, weightBlockNumber, core); - backend()->onReleaseBuffer(cache.get(), Backend::STATIC); + initWeight(mResource->mWeight->host(), mSparseIndexData->mNNZMap->host(), mSparseIndexData->mDataOffsetMap->host(), sparseBlockOC, weight->alpha()->data(), weight->index()->data(), weight->index()->size(), srcCount, outputCount, common->kernelX() * common->kernelY(), eP, weightNNZElement, weightBlockNumber, core); mProxy.reset(new SparseConvolutionTiledImpl(common, packedSparseMatmul, sparseBlockOC, b)); } diff --git a/source/backend/cpu/compute/SparseConvolutionTiledExecutor.hpp b/source/backend/cpu/compute/SparseConvolutionTiledExecutor.hpp index 7409821a..806d1339 100644 --- a/source/backend/cpu/compute/SparseConvolutionTiledExecutor.hpp +++ b/source/backend/cpu/compute/SparseConvolutionTiledExecutor.hpp @@ -67,8 +67,7 @@ public: class SparseConvolutionTiledExecutor : public ConvolutionTiledExecutor { public: - SparseConvolutionTiledExecutor(const Convolution2DCommon *common, Backend *b, const float *originWeight, - size_t originWeightSize, const SparseCommon* sparseCommon, const float *bias, size_t biasSize); + SparseConvolutionTiledExecutor(const Convolution2DCommon *common, Backend *b, const IDSTQuan* weight, const SparseCommon* sparseCommon, const float *bias, size_t biasSize); SparseConvolutionTiledExecutor(std::shared_ptr res, std::shared_ptr mSparseIndexData, const Convolution2DCommon *common, MNNPackedSparseMatMul packedSparseMatmul, int sparseBlockOC, Backend *b); @@ -84,24 +83,9 @@ public: virtual bool onClone(Backend *bn, const Op *op, Execution **dst) override; void initWeight(float *dest, unsigned int *NNZMap, int *dataOffsetMap, int sparseBlockOC, const float *source, - float *cache, int depth, int outputCount, int kernelSize, int eP, size_t weightNNZElement, + const uint32_t* indexes, uint32_t indexSize, int depth, int outputCount, int kernelSize, int eP, size_t weightNNZElement, size_t weightBlockNumber, const CoreFunctions *function); - static bool shouldUseSparseConvolution(size_t originWeightSize, const SparseCommon* sparseCommon) { - auto sparseBlockOC = sparseCommon->args()->LookupByKey("sparseBlockOC")->i(); - size_t weightNNZElement = sparseCommon->args()->LookupByKey("NNZElement")->i(); - return shouldUseSparseConvolution((originWeightSize - weightNNZElement) / ((double)originWeightSize), sparseBlockOC); - } - static bool inline shouldUseSparseConvolution(float sparsity, int sparseBlockOC) { - std::vector thresholds = getSparsityThreshold(); - return sparsity > thresholds[std::min(std::max(sparseBlockOC, 0), (int)thresholds.size() - 1)]; - } - static inline std::vector getSparsityThreshold() { - - // sparsity threadhold values, when sparseblock is - // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} - return {1.f, 0.6f, 0.5f, 0.4f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f}; - } protected: std::shared_ptr mProxy; std::shared_ptr mSparseIndexData; @@ -110,4 +94,4 @@ protected: #undef RELEASE_BUFFER_HINT } // namespace MNN -#endif /* SparseConvolutionTiledExecutor_hpp */ \ No newline at end of file +#endif /* SparseConvolutionTiledExecutor_hpp */ diff --git a/source/backend/cuda/CMakeLists.txt b/source/backend/cuda/CMakeLists.txt index 99e4a1db..9f648ad1 100644 --- a/source/backend/cuda/CMakeLists.txt +++ b/source/backend/cuda/CMakeLists.txt @@ -1,4 +1,4 @@ -set(CUDA_MIN_VERSION "7.0") +set(CUDA_MIN_VERSION "8.0") find_package(CUDA ${CUDA_MIN_VERSION}) set (EXTRA_LIBS "") @@ -21,6 +21,16 @@ if(CUDA_FOUND) include(${CMAKE_CURRENT_SOURCE_DIR}/SelectCudaComputeArch.cmake) CUDA_SELECT_NVCC_ARCH_FLAGS(CUDA_ARCH_FLAGS ${CUDA_ARCHS}) + list(LENGTH CUDA_ARCH_FLAGS_readable_code arch_count) + # Current Supported Arch List + IF (${arch_count} EQUAL 1) + set(support_archs 60 61 62 70 72 75 80 86) + list(FIND support_archs ${CUDA_ARCH_FLAGS_readable_code} list_index) + IF (${list_index} EQUAL -1) + message(FATAL_ERROR "Please add your own sm arch ${CUDA_ARCH_FLAGS_readable_code} to CmakeLists.txt!") + ENDIF() + ENDIF() + IF ((CUDA_VERSION VERSION_GREATER "8.0") OR (CUDA_VERSION VERSION_EQUAL "8.0")) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_60,code=sm_60") set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_61,code=sm_61") @@ -41,6 +51,27 @@ if(CUDA_FOUND) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_86,code=sm_86") ENDIF() + # Limit minimum cuda version for each archs + IF (${arch_count} EQUAL 1) + IF ((CUDA_ARCH_FLAGS_readable_code VERSION_GREATER "80") OR (CUDA_ARCH_FLAGS_readable_code VERSION_EQUAL "80")) + IF (CUDA_VERSION VERSION_LESS "11.2") + message(FATAL_ERROR "Please update cuda version to 11.2 or higher!") + ENDIF() + ENDIF() + + IF ((CUDA_ARCH_FLAGS_readable_code VERSION_GREATER "75") OR (CUDA_ARCH_FLAGS_readable_code VERSION_EQUAL "75")) + IF (CUDA_VERSION VERSION_LESS "10.2") + message(FATAL_ERROR "Please update cuda version to 10.2 or higher!") + ENDIF() + ENDIF() + + IF ((CUDA_ARCH_FLAGS_readable_code VERSION_GREATER "70") OR (CUDA_ARCH_FLAGS_readable_code VERSION_EQUAL "70")) + IF (CUDA_VERSION VERSION_LESS "10.1") + message(FATAL_ERROR "Please update cuda version to 10.1 or higher!") + ENDIF() + ENDIF() + ENDIF() + message(STATUS "Enabling CUDA support (version: ${CUDA_VERSION_STRING}," " archs: ${CUDA_ARCH_FLAGS_readable})") else() diff --git a/source/backend/cuda/SelectCudaComputeArch.cmake b/source/backend/cuda/SelectCudaComputeArch.cmake index d4b68a51..889e7af8 100755 --- a/source/backend/cuda/SelectCudaComputeArch.cmake +++ b/source/backend/cuda/SelectCudaComputeArch.cmake @@ -36,9 +36,9 @@ # - "Auto" detects local machine GPU compute arch at runtime. # - "Common" and "All" cover common and entire subsets of architectures # ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX -# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal Volta Turing +# NAME: Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal Volta Turing Ampere # NUM: Any number. Only those pairs are currently accepted by NVCC though: -# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 7.0 7.2 7.5 +# 3.5 3.7 5.0 5.2 5.3 6.0 6.2 7.0 7.2 7.5 8.0 # Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable} # Additionally, sets ${out_variable}_readable to the resulting numeric list # Example: @@ -58,39 +58,19 @@ endif() # See: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list # This list will be used for CUDA_ARCH_NAME = All option -set(CUDA_KNOWN_GPU_ARCHITECTURES "") - -# CUDA 9.X and later do not support the Fermi architecture anymore. -if(CUDA_VERSION VERSION_LESS "9.0") - list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Fermi") -endif() -list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler" "Maxwell") +set(CUDA_KNOWN_GPU_ARCHITECTURES "Kepler" "Maxwell") # This list will be used for CUDA_ARCH_NAME = Common option (enabled by default) -set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5" "5.0") - -if(CUDA_VERSION VERSION_LESS "7.0") - set(CUDA_LIMIT_GPU_ARCHITECTURE "5.2") -endif() +set(CUDA_COMMON_GPU_ARCHITECTURES "3.5" "5.0") # This list is used to filter CUDA archs when autodetecting -set(CUDA_ALL_GPU_ARCHITECTURES "3.0" "3.2" "3.5" "5.0") - -if(CUDA_VERSION VERSION_EQUAL "7.0" OR CUDA_VERSION VERSION_GREATER "7.0") - list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra") - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2") - - if(CUDA_VERSION VERSION_LESS "8.0") - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX") - set(CUDA_LIMIT_GPU_ARCHITECTURE "6.0") - endif() -endif() +set(CUDA_ALL_GPU_ARCHITECTURES "3.5" "5.0") if(CUDA_VERSION VERSION_EQUAL "8.0" OR CUDA_VERSION VERSION_GREATER "8.0") list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal") list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1") list(APPEND CUDA_ALL_GPU_ARCHITECTURES "6.0" "6.1" "6.2") - + if(CUDA_VERSION VERSION_LESS "9.0") list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.1+PTX") set(CUDA_LIMIT_GPU_ARCHITECTURE "7.0") @@ -101,22 +81,58 @@ if(CUDA_VERSION VERSION_EQUAL "9.0" OR CUDA_VERSION VERSION_GREATER "9.0") list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Volta") list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.0" "7.0+PTX") list(APPEND CUDA_ALL_GPU_ARCHITECTURES "7.0" "7.0+PTX" "7.2" "7.2+PTX") - if(CUDA_VERSION VERSION_LESS "10.0") set(CUDA_LIMIT_GPU_ARCHITECTURE "8.0") endif() endif() +if(CUDA_VERSION VERSION_GREATER "10.5") + list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Ampere") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.0") + list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.0") + + if(CUDA_VERSION VERSION_LESS "11.1") + set(CUDA_LIMIT_GPU_ARCHITECTURE "8.0") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.0+PTX") + endif() +endif() + if(CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Turing") list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.5" "7.5+PTX") list(APPEND CUDA_ALL_GPU_ARCHITECTURES "7.5" "7.5+PTX") - + if(CUDA_VERSION VERSION_LESS "11.0") set(CUDA_LIMIT_GPU_ARCHITECTURE "9.0") endif() endif() +if(NOT CUDA_VERSION VERSION_LESS "11.1") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.6") + list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.6") + set(CUDA_LIMIT_GPU_ARCHITECUTRE "8.6") + + if(CUDA_VERSION VERSION_LESS "11.8") + set(CUDA_LIMIT_GPU_ARCHITECTURE "8.9") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.6+PTX") + endif() +endif() + +if(NOT CUDA_VERSION VERSION_LESS "11.8") + list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Ada") + list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Hopper") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.9") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "9.0") + list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.9") + list(APPEND CUDA_ALL_GPU_ARCHITECTURES "9.0") + + if(CUDA_VERSION VERSION_LESS "12.0") + set(CUDA_LIMIT_GPU_ARCHITECTURE "9.0") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.9+PTX") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "9.0+PTX") + endif() +endif() + ################################################################################################ # A function for automatic detection of GPUs installed (if autodetection is enabled) # Usage: @@ -175,7 +191,8 @@ function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE) set(CUDA_GPU_DETECT_OUTPUT_FILTERED "") separate_arguments(CUDA_GPU_DETECT_OUTPUT) foreach(ITEM IN ITEMS ${CUDA_GPU_DETECT_OUTPUT}) - if(CUDA_LIMIT_GPU_ARCHITECTURE AND (ITEM VERSION_EQUAL CUDA_LIMIT_GPU_ARCHITECTURE OR ITEM VERSION_GREATER CUDA_LIMIT_GPU_ARCHITECTURE)) + if(CUDA_LIMIT_GPU_ARCHITECTURE AND (ITEM VERSION_GREATER CUDA_LIMIT_GPU_ARCHITECTURE OR + ITEM VERSION_EQUAL CUDA_LIMIT_GPU_ARCHITECTURE)) list(GET CUDA_COMMON_GPU_ARCHITECTURES -1 NEWITEM) string(APPEND CUDA_GPU_DETECT_OUTPUT_FILTERED " ${NEWITEM}") else() @@ -228,14 +245,10 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable) set(arch_ptx ${arch_bin}) else() # Look for it in our list of known architectures - if(${arch_name} STREQUAL "Fermi") - set(arch_bin 2.0 "2.1(2.0)") - elseif(${arch_name} STREQUAL "Kepler+Tegra") - set(arch_bin 3.2) - elseif(${arch_name} STREQUAL "Kepler+Tesla") + if(${arch_name} STREQUAL "Kepler+Tesla") set(arch_bin 3.7) elseif(${arch_name} STREQUAL "Kepler") - set(arch_bin 3.0 3.5) + set(arch_bin 3.5) set(arch_ptx 3.5) elseif(${arch_name} STREQUAL "Maxwell+Tegra") set(arch_bin 5.3) @@ -245,12 +258,25 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable) elseif(${arch_name} STREQUAL "Pascal") set(arch_bin 6.0 6.1) set(arch_ptx 6.1) + elseif(${arch_name} STREQUAL "Volta+Tegra") + set(arch_bin 7.2) elseif(${arch_name} STREQUAL "Volta") set(arch_bin 7.0 7.0) set(arch_ptx 7.0) elseif(${arch_name} STREQUAL "Turing") set(arch_bin 7.5) set(arch_ptx 7.5) + elseif(${arch_name} STREQUAL "Ampere+Tegra") + set(arch_bin 8.7) + elseif(${arch_name} STREQUAL "Ampere") + set(arch_bin 8.0 8.6) + set(arch_ptx 8.0 8.6) + elseif(${arch_name} STREQUAL "Ada") + set(arch_bin 8.9) + set(arch_ptx 8.9) + elseif(${arch_name} STREQUAL "Hopper") + set(arch_bin 9.0) + set(arch_ptx 9.0) else() message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS") endif() @@ -282,17 +308,20 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable) set(nvcc_flags "") set(nvcc_archs_readable "") + set(nvcc_archs_code "") # Tell NVCC to add binaries for the specified GPUs foreach(arch ${cuda_arch_bin}) if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") # User explicitly specified ARCH for the concrete CODE - list(APPEND nvcc_flags " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}") + list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) + list(APPEND nvcc_archs_code ${CMAKE_MATCH_1}) else() # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE - list(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}") + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) list(APPEND nvcc_archs_readable sm_${arch}) + list(APPEND nvcc_archs_code ${arch}) endif() endforeach() @@ -305,4 +334,5 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable) string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") set(${out_variable} ${nvcc_flags} PARENT_SCOPE) set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) -endfunction() + set(${out_variable}_readable_code ${nvcc_archs_code} PARENT_SCOPE) +endfunction() \ No newline at end of file diff --git a/source/backend/cuda/execution/CutlassGemmParam.hpp b/source/backend/cuda/execution/CutlassGemmParam.hpp index 383c0dd4..5553de38 100644 --- a/source/backend/cuda/execution/CutlassGemmParam.hpp +++ b/source/backend/cuda/execution/CutlassGemmParam.hpp @@ -215,7 +215,8 @@ using GemmTensor_F16_F16_Linear_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueCudaOp_F16_Linear, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F16_F16_Linear_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -232,7 +233,8 @@ using GemmTensor_F16_F16_Linear_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueTensorOp_F16_Linear, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F16_F32_Linear_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -249,7 +251,8 @@ using GemmTensor_F16_F32_Linear_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueCudaOp_F32_Linear, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F16_F32_Linear_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -266,7 +269,8 @@ using GemmTensor_F16_F32_Linear_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueTensorOp_F32_Linear, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmCuda_F32_F32_Linear_AlignCuda = cutlass::gemm::device::Gemm< float, @@ -334,7 +338,8 @@ using GemmTensor_F32_F32_Linear_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueCudaOp_F32_Linear, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F32_F32_Linear_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< float, @@ -351,7 +356,8 @@ using GemmTensor_F32_F32_Linear_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueTensorOp_F32_Linear, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmCuda_F16_F16_Relu_AlignCuda = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -470,7 +476,8 @@ using GemmTensor_F16_F16_Relu_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueCudaOp_F16_Relu, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F16_F16_Relu_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -487,7 +494,8 @@ using GemmTensor_F16_F16_Relu_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueTensorOp_F16_Relu, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F16_F32_Relu_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -504,7 +512,8 @@ using GemmTensor_F16_F32_Relu_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueCudaOp_F32_Relu, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F16_F32_Relu_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -521,7 +530,8 @@ using GemmTensor_F16_F32_Relu_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueTensorOp_F32_Relu, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmCuda_F32_F32_Relu_AlignCuda = cutlass::gemm::device::Gemm< float, @@ -589,7 +599,8 @@ using GemmTensor_F32_F32_Relu_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueCudaOp_F32_Relu, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F32_F32_Relu_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< float, @@ -606,7 +617,8 @@ using GemmTensor_F32_F32_Relu_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueTensorOp_F32_Relu, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmCuda_F16_F16_Relu6_AlignCuda = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -725,7 +737,8 @@ using GemmTensor_F16_F16_Relu6_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueCudaOp_F16_Relu6, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F16_F16_Relu6_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -742,7 +755,8 @@ using GemmTensor_F16_F16_Relu6_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueTensorOp_F16_Relu6, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F16_F32_Relu6_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -759,7 +773,8 @@ using GemmTensor_F16_F32_Relu6_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueCudaOp_F32_Relu6, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F16_F32_Relu6_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::half_t, @@ -776,7 +791,8 @@ using GemmTensor_F16_F32_Relu6_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueTensorOp_F32_Relu6, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmCuda_F32_F32_Relu6_AlignCuda = cutlass::gemm::device::Gemm< float, @@ -844,7 +860,8 @@ using GemmTensor_F32_F32_Relu6_AlignCuda_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueCudaOp_F32_Relu6, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; using GemmTensor_F32_F32_Relu6_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< float, @@ -861,8 +878,9 @@ using GemmTensor_F32_F32_Relu6_AlignTensor_Sm75 = cutlass::gemm::device::Gemm< cutlass::gemm::GemmShape<16, 8, 8>, EpilogueTensorOp_F32_Relu6, SwizzleThreadBlock, - NumStages>; + NumStages, + 128 / cutlass::sizeof_bits::value, 128 / cutlass::sizeof_bits::value, true>; } } -#endif \ No newline at end of file +#endif diff --git a/source/backend/cuda/execution/LayerNormExecution.cu b/source/backend/cuda/execution/LayerNormExecution.cu index 2467df1c..3689ad53 100644 --- a/source/backend/cuda/execution/LayerNormExecution.cu +++ b/source/backend/cuda/execution/LayerNormExecution.cu @@ -4,36 +4,6 @@ namespace CUDA { #define CUDA_KERNEL_LOOP(i, n) for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x) -#define FINAL_MASK 0xffffffff - -template -__inline__ __device__ -T warpReduceSum(T val) -{ - for(int mask = 16; mask > 0; mask >>= 1) - val += __shfl_xor_sync(FINAL_MASK, val, mask, 32); - return val; -} - -template -__inline__ __device__ -T blockReduceSum(T val) -{ - static __shared__ T shared[32]; - int lane = threadIdx.x & 0x1f; - int wid = threadIdx.x >> 5; - - val = warpReduceSum(val); - - if(lane == 0) - shared[wid] = val; - __syncthreads(); - - val = (threadIdx.x < (blockDim.x >> 5 )) ? shared[lane] : (T)0.0f; - val = warpReduceSum(val); - return val; -} - template __global__ void input_layernorm(T* out, const T* input, const float* gamma, const float* beta, int m, int n, const float epsilon, int sumPerKnl) diff --git a/source/backend/cuda/execution/LayerNormExecution.hpp b/source/backend/cuda/execution/LayerNormExecution.hpp index 4815b8cf..a741f8f8 100644 --- a/source/backend/cuda/execution/LayerNormExecution.hpp +++ b/source/backend/cuda/execution/LayerNormExecution.hpp @@ -10,7 +10,7 @@ #define LayerNormExecution_hpp #include "core/Execution.hpp" - +#include "MNNCUDAFunction.cuh" #include #include "backend/cuda/core/CUDABackend.hpp" diff --git a/source/backend/cuda/execution/MNNCUDAFunction.cuh b/source/backend/cuda/execution/MNNCUDAFunction.cuh index 9585d60c..6386453c 100644 --- a/source/backend/cuda/execution/MNNCUDAFunction.cuh +++ b/source/backend/cuda/execution/MNNCUDAFunction.cuh @@ -1,6 +1,8 @@ #ifndef MNNCUDAFunction_cuh #define MNNCUDAFunction_cuh +#include + struct DivModFast { DivModFast(int d = 1) { @@ -35,4 +37,68 @@ struct DivModFast { uint32_t l_; // ceil(log2(d_)) uint32_t m_; // m' in the papaer }; + + +#define FINAL_MASK 0xffffffff + +template +__inline__ __device__ +T warpReduceSum(T val) +{ + for(int mask = 16; mask > 0; mask >>= 1) { + val += __shfl_xor_sync(FINAL_MASK, val, mask, 32); + } + return val; +} + +template +__inline__ __device__ +T blockReduceSum(T val) +{ + static __shared__ T shared[32]; + int lane = threadIdx.x & 0x1f; + int wid = threadIdx.x >> 5; + + val = warpReduceSum(val); + + if(lane == 0) { + shared[wid] = val; + } + __syncthreads(); + + val = (threadIdx.x < (blockDim.x >> 5 )) ? shared[lane] : (T)0.0f; + val = warpReduceSum(val); + return val; +} + +template +__inline__ __device__ +T warpReduceMax(T val) +{ + for(int mask = 16; mask > 0; mask >>= 1) { + val = max(val, __shfl_xor_sync(FINAL_MASK, val, mask, 32)); + } + return val; +} + +template +__inline__ __device__ +T blockReduceMax(T val) +{ + static __shared__ T shared[32]; + int lane = threadIdx.x & 0x1f; + int wid = threadIdx.x >> 5; + + val = warpReduceMax(val); + + if(lane == 0) { + shared[wid] = val; + } + __syncthreads(); + + val = (threadIdx.x < (blockDim.x >> 5 )) ? shared[lane] : (T)0.0f; + val = warpReduceMax(val); + return val; +} + #endif \ No newline at end of file diff --git a/source/backend/cuda/execution/MatMulExecution.cu b/source/backend/cuda/execution/MatMulExecution.cu index a128dc06..5cd13dd0 100644 --- a/source/backend/cuda/execution/MatMulExecution.cu +++ b/source/backend/cuda/execution/MatMulExecution.cu @@ -425,59 +425,109 @@ void MatMulExecution::setArguments(const std::vector &inputs, const st cutlass_check(status); } else { if(hAlignment) { - typename GemmBatchedTensor_F16_F16_Linear_AlignTensor_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication - {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A - {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B - {(ElementOutput_F16 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, - (int64_t)(0), // batch_stride_bias - {(ElementOutput_F16 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C - {alpha, beta}, // <- tuple of alpha and beta - mBatch}; // batch_count + if(mConvertGemmSplitK) { + int split_k_slices = 16; + typename GemmTensor_F16_F16_Linear_AlignTensor_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementOutput_F16 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + {(ElementOutput_F16 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + {alpha, beta}, // <- tuple of alpha and beta + split_k_slices}; // <- k-dimension split factor + size_t workspace_size = GemmTensor_F16_F16_Linear_AlignTensor_Sm75::get_workspace_size(arguments); - size_t workspace_size = GemmBatchedTensor_F16_F16_Linear_AlignTensor_Row_Column_Sm75::get_workspace_size(arguments); + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } - if(workspace_size != 0) { - workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); - mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); - mWorkspace = (void *)workspaceTensor.get()->buffer().device; + cutlass::Status status = mGemmF16F16LnAlign8Sm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmF16F16LnAlign8Sm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); + } else { + typename GemmBatchedTensor_F16_F16_Linear_AlignTensor_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A + {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B + {(ElementOutput_F16 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + (int64_t)(0), // batch_stride_bias + {(ElementOutput_F16 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C + {alpha, beta}, // <- tuple of alpha and beta + mBatch}; // batch_count + + size_t workspace_size = GemmBatchedTensor_F16_F16_Linear_AlignTensor_Row_Column_Sm75::get_workspace_size(arguments); + + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + // Check the problem size is supported or not + cutlass::Status status = mGemmBatchedF16F16LnAlign8RCSm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmBatchedF16F16LnAlign8RCSm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); } - // Check the problem size is supported or not - cutlass::Status status = mGemmBatchedF16F16LnAlign8RCSm75.can_implement(arguments); - cutlass_check(status); - - // Initialize CUTLASS kernel with arguments and workspace pointer - status = mGemmBatchedF16F16LnAlign8RCSm75.initialize(arguments, (uint8_t *)mWorkspace); - cutlass_check(status); } else { - typename GemmBatchedTensor_F16_F16_Linear_AlignCuda_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication - {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A - {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B - {(ElementOutput_F16 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, - (int64_t)(0), // batch_stride_bias - {(ElementOutput_F16 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C - {alpha, beta}, // <- tuple of alpha and beta - mBatch}; // batch_count + if(mConvertGemmSplitK) { + int split_k_slices = 16; + typename GemmTensor_F16_F16_Linear_AlignCuda_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementOutput_F16 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + {(ElementOutput_F16 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + {alpha, beta}, // <- tuple of alpha and beta + split_k_slices}; // <- k-dimension split factor + size_t workspace_size = GemmTensor_F16_F16_Linear_AlignCuda_Sm75::get_workspace_size(arguments); - size_t workspace_size = GemmBatchedTensor_F16_F16_Linear_AlignCuda_Row_Column_Sm75::get_workspace_size(arguments); + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } - if(workspace_size != 0) { - workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); - mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); - mWorkspace = (void *)workspaceTensor.get()->buffer().device; + cutlass::Status status = mGemmF16F16LnAlign1Sm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmF16F16LnAlign1Sm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); + } else { + typename GemmBatchedTensor_F16_F16_Linear_AlignCuda_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A + {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B + {(ElementOutput_F16 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + (int64_t)(0), // batch_stride_bias + {(ElementOutput_F16 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C + {alpha, beta}, // <- tuple of alpha and beta + mBatch}; // batch_count + + size_t workspace_size = GemmBatchedTensor_F16_F16_Linear_AlignCuda_Row_Column_Sm75::get_workspace_size(arguments); + + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + // Check the problem size is supported or not + cutlass::Status status = mGemmBatchedF16F16LnAlign1RCSm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmBatchedF16F16LnAlign1RCSm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); } - // Check the problem size is supported or not - cutlass::Status status = mGemmBatchedF16F16LnAlign1RCSm75.can_implement(arguments); - cutlass_check(status); - - // Initialize CUTLASS kernel with arguments and workspace pointer - status = mGemmBatchedF16F16LnAlign1RCSm75.initialize(arguments, (uint8_t *)mWorkspace); - cutlass_check(status); } } @@ -541,63 +591,31 @@ void MatMulExecution::setArguments(const std::vector &inputs, const st } else { if(hAlignment) { if(mNeedConvertMatAB) { - typename GemmBatchedTensor_F16_F32_Linear_AlignTensor_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication - {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A - {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B - {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, - (int64_t)(0), // batch_stride_bias - {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C - {alpha, beta}, // <- tuple of alpha and beta - mBatch}; // batch_count - - size_t workspace_size = GemmBatchedTensor_F16_F32_Linear_AlignTensor_Row_Column_Sm75::get_workspace_size(arguments); - - if(workspace_size != 0) { - workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); - mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); - mWorkspace = (void *)workspaceTensor.get()->buffer().device; - } - // Check the problem size is supported or not - cutlass::Status status = mGemmBatchedF16F32LnAlign8RCSm75.can_implement(arguments); - cutlass_check(status); - - // Initialize CUTLASS kernel with arguments and workspace pointer - status = mGemmBatchedF16F32LnAlign8RCSm75.initialize(arguments, (uint8_t *)mWorkspace); - cutlass_check(status); - } else { - typename GemmBatchedTensor_F32_F32_Linear_AlignTensor_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication - {(ElementInput_F32 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A - {(ElementInput_F32 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B - {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, - (int64_t)(0), // batch_stride_bias - {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C - {alpha, beta}, // <- tuple of alpha and beta - mBatch}; // batch_count - - size_t workspace_size = GemmBatchedTensor_F32_F32_Linear_AlignTensor_Row_Column_Sm75::get_workspace_size(arguments); - - if(workspace_size != 0) { - workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); - mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); - mWorkspace = (void *)workspaceTensor.get()->buffer().device; - } - // Check the problem size is supported or not - cutlass::Status status = mGemmBatchedF32F32LnAlign8RCSm75.can_implement(arguments); - cutlass_check(status); - - // Initialize CUTLASS kernel with arguments and workspace pointer - status = mGemmBatchedF32F32LnAlign8RCSm75.initialize(arguments, (uint8_t *)mWorkspace); - cutlass_check(status); - } - } else { - if(mNeedConvertMatAB) { - typename GemmBatchedTensor_F16_F32_Linear_AlignCuda_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + if(mConvertGemmSplitK) { + int split_k_slices = 16; + typename GemmTensor_F16_F32_Linear_AlignTensor_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + {alpha, beta}, // <- tuple of alpha and beta + split_k_slices}; // <- k-dimension split factor + size_t workspace_size = GemmTensor_F16_F32_Linear_AlignTensor_Sm75::get_workspace_size(arguments); + + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + + cutlass::Status status = mGemmF16F32LnAlign8Sm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmF16F32LnAlign8Sm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); + } else { + typename GemmBatchedTensor_F16_F32_Linear_AlignTensor_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm @@ -609,47 +627,179 @@ void MatMulExecution::setArguments(const std::vector &inputs, const st {alpha, beta}, // <- tuple of alpha and beta mBatch}; // batch_count - size_t workspace_size = GemmBatchedTensor_F16_F32_Linear_AlignCuda_Row_Column_Sm75::get_workspace_size(arguments); + size_t workspace_size = GemmBatchedTensor_F16_F32_Linear_AlignTensor_Row_Column_Sm75::get_workspace_size(arguments); - if(workspace_size != 0) { - workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); - mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); - mWorkspace = (void *)workspaceTensor.get()->buffer().device; + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + // Check the problem size is supported or not + cutlass::Status status = mGemmBatchedF16F32LnAlign8RCSm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmBatchedF16F32LnAlign8RCSm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); } - // Check the problem size is supported or not - cutlass::Status status = mGemmBatchedF16F32LnAlign1RCSm75.can_implement(arguments); - cutlass_check(status); - - // Initialize CUTLASS kernel with arguments and workspace pointer - status = mGemmBatchedF16F32LnAlign1RCSm75.initialize(arguments, (uint8_t *)mWorkspace); - cutlass_check(status); } else { - typename GemmBatchedTensor_F32_F32_Linear_AlignCuda_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication - {(ElementInput_F32 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A - {(ElementInput_F32 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm - (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B - {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, - (int64_t)(0), // batch_stride_bias - {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm - (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C - {alpha, beta}, // <- tuple of alpha and beta - mBatch}; // batch_count + if(mConvertGemmSplitK) { + int split_k_slices = 16; + typename GemmTensor_F32_F32_Linear_AlignTensor_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F32 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementInput_F32 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + {alpha, beta}, // <- tuple of alpha and beta + split_k_slices}; // <- k-dimension split factor + size_t workspace_size = GemmTensor_F32_F32_Linear_AlignTensor_Sm75::get_workspace_size(arguments); + + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + + cutlass::Status status = mGemmF32F32LnAlign8Sm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmF32F32LnAlign8Sm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); + } else { + typename GemmBatchedTensor_F32_F32_Linear_AlignTensor_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F32 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A + {(ElementInput_F32 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B + {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + (int64_t)(0), // batch_stride_bias + {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C + {alpha, beta}, // <- tuple of alpha and beta + mBatch}; // batch_count - size_t workspace_size = GemmBatchedTensor_F32_F32_Linear_AlignCuda_Row_Column_Sm75::get_workspace_size(arguments); + size_t workspace_size = GemmBatchedTensor_F32_F32_Linear_AlignTensor_Row_Column_Sm75::get_workspace_size(arguments); - if(workspace_size != 0) { - workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); - mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); - mWorkspace = (void *)workspaceTensor.get()->buffer().device; + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + // Check the problem size is supported or not + cutlass::Status status = mGemmBatchedF32F32LnAlign8RCSm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmBatchedF32F32LnAlign8RCSm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); } - // Check the problem size is supported or not - cutlass::Status status = mGemmBatchedF32F32LnAlign1RCSm75.can_implement(arguments); - cutlass_check(status); + } + } else { + if(mNeedConvertMatAB) { + if(mConvertGemmSplitK) { + int split_k_slices = 16; + typename GemmTensor_F16_F32_Linear_AlignCuda_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + {alpha, beta}, // <- tuple of alpha and beta + split_k_slices}; // <- k-dimension split factor + size_t workspace_size = GemmTensor_F16_F32_Linear_AlignCuda_Sm75::get_workspace_size(arguments); + + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + + cutlass::Status status = mGemmF16F32LnAlign1Sm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmF16F32LnAlign1Sm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); + } else { + typename GemmBatchedTensor_F16_F32_Linear_AlignCuda_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F16 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A + {(ElementInput_F16 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B + {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + (int64_t)(0), // batch_stride_bias + {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C + {alpha, beta}, // <- tuple of alpha and beta + mBatch}; // batch_count - // Initialize CUTLASS kernel with arguments and workspace pointer - status = mGemmBatchedF32F32LnAlign1RCSm75.initialize(arguments, (uint8_t *)mWorkspace); - cutlass_check(status); + size_t workspace_size = GemmBatchedTensor_F16_F32_Linear_AlignCuda_Row_Column_Sm75::get_workspace_size(arguments); + + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + // Check the problem size is supported or not + cutlass::Status status = mGemmBatchedF16F32LnAlign1RCSm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmBatchedF16F32LnAlign1RCSm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); + } + } else { + if(mConvertGemmSplitK) { + int split_k_slices = 16; + typename GemmTensor_F32_F32_Linear_AlignCuda_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F32 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementInput_F32 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + {alpha, beta}, // <- tuple of alpha and beta + split_k_slices}; // <- k-dimension split factor + size_t workspace_size = GemmTensor_F32_F32_Linear_AlignCuda_Sm75::get_workspace_size(arguments); + + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + + cutlass::Status status = mGemmF32F32LnAlign1Sm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmF32F32LnAlign1Sm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); + } else { + typename GemmBatchedTensor_F32_F32_Linear_AlignCuda_Row_Column_Sm75::Arguments arguments{problem_size, // <- problem size of matrix multiplication + {(ElementInput_F32 *)mTempMatA, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elhPad[1]* mAs), // batch_stride_A + {(ElementInput_F32 *)mTempMatB, mGemmInfo.elhPad[1]}, // Ptr + ldm + (int64_t)(mGemmInfo.elhPad[1] * mGemmInfo.elh[2]* mBs), // batch_stride_B + {(ElementOutput_F32 *)mBiasPtr, 0}, // Ptr + ldm if ldm = 0, vector, + (int64_t)(0), // batch_stride_bias + {(ElementOutput_F32 *)C->deviceId(), mGemmInfo.elh[2]}, // Ptr + ldm + (int64_t)(mGemmInfo.elh[0] * mGemmInfo.elh[2]), // batch_stride_C + {alpha, beta}, // <- tuple of alpha and beta + mBatch}; // batch_count + + size_t workspace_size = GemmBatchedTensor_F32_F32_Linear_AlignCuda_Row_Column_Sm75::get_workspace_size(arguments); + + if(workspace_size != 0) { + workspaceTensor.reset(Tensor::createDevice({(int)workspace_size})); + mBackend->onAcquireBuffer(workspaceTensor.get(), Backend::STATIC); + mWorkspace = (void *)workspaceTensor.get()->buffer().device; + } + // Check the problem size is supported or not + cutlass::Status status = mGemmBatchedF32F32LnAlign1RCSm75.can_implement(arguments); + cutlass_check(status); + + // Initialize CUTLASS kernel with arguments and workspace pointer + status = mGemmBatchedF32F32LnAlign1RCSm75.initialize(arguments, (uint8_t *)mWorkspace); + cutlass_check(status); + } } } } @@ -695,7 +845,7 @@ ErrorCode MatMulExecution::onResize(const std::vector &inputs, const s mNeedBTempBuffer = (needBTranspose || !lAlignment) || mFp16Fp32MixInfer; mNeedConvertMatAB = (mNeedATempBuffer || mNeedBTempBuffer); - //MNN_PRINT("trAtrB:%d-%d, tmpAB:%d-%d inps:%d, bwlh:%d-%d-%d-%d\n", mTransposeA, mTransposeB, mNeedATempBuffer, mNeedBTempBuffer, inputs.size(), mBatch, mGemmInfo.elh[0], mGemmInfo.elh[1], mGemmInfo.elh[2]); + // MNN_PRINT("trAtrB:%d-%d, tmpAB:%d-%d inps:%d, bwlh:%d-%d-%d-%d\n", mTransposeA, mTransposeB, mNeedATempBuffer, mNeedBTempBuffer, inputs.size(), mBatch, mGemmInfo.elh[0], mGemmInfo.elh[1], mGemmInfo.elh[2]); auto pool = static_cast(backend())->getBufferPool(); std::pair bufferAData, bufferBData; @@ -730,6 +880,7 @@ ErrorCode MatMulExecution::onResize(const std::vector &inputs, const s } //printf("MatMulAB:%p-%p-%p-%p\n", A->host(), A->deviceId(), B->host(), B->deviceId()); + mConvertGemmSplitK = ((mBatch == 1) && (mGemmInfo.elhPad[1] >= 16384)); // Set Cutlass Param Arguments mResizeSetArgument = (mTempMatA != nullptr && mTempMatB != nullptr && C->deviceId() != 0); if(mResizeSetArgument) { @@ -855,19 +1006,39 @@ ErrorCode MatMulExecution::onExecute(const std::vector &inputs, const } else { if(hAlignment) { if(mNeedConvertMatAB) { - cutlass::Status status = mGemmBatchedF16F32LnAlign8RCSm75(); - cutlass_check(status); + if(mConvertGemmSplitK) { + cutlass::Status status = mGemmF16F32LnAlign8Sm75(); + cutlass_check(status); + } else { + cutlass::Status status = mGemmBatchedF16F32LnAlign8RCSm75(); + cutlass_check(status); + } } else { - cutlass::Status status = mGemmBatchedF32F32LnAlign8RCSm75(); - cutlass_check(status); + if(mConvertGemmSplitK) { + cutlass::Status status = mGemmF32F32LnAlign8Sm75(); + cutlass_check(status); + } else { + cutlass::Status status = mGemmBatchedF32F32LnAlign8RCSm75(); + cutlass_check(status); + } } } else { if(mNeedConvertMatAB) { - cutlass::Status status = mGemmBatchedF16F32LnAlign1RCSm75(); - cutlass_check(status); + if(mConvertGemmSplitK) { + cutlass::Status status = mGemmF16F32LnAlign1Sm75(); + cutlass_check(status); + } else { + cutlass::Status status = mGemmBatchedF16F32LnAlign1RCSm75(); + cutlass_check(status); + } } else { - cutlass::Status status = mGemmBatchedF32F32LnAlign1RCSm75(); - cutlass_check(status); + if(mConvertGemmSplitK) { + cutlass::Status status = mGemmF32F32LnAlign1Sm75(); + cutlass_check(status); + } else { + cutlass::Status status = mGemmBatchedF32F32LnAlign1RCSm75(); + cutlass_check(status); + } } } } @@ -878,15 +1049,25 @@ ErrorCode MatMulExecution::onExecute(const std::vector &inputs, const cutlass_check(status); } else { if(hAlignment) { - cutlass::Status status = mGemmBatchedF16F16LnAlign8RCSm75(); - cutlass_check(status); + if(mConvertGemmSplitK) { + cutlass::Status status = mGemmF16F16LnAlign8Sm75(); + cutlass_check(status); + } else { + cutlass::Status status = mGemmBatchedF16F16LnAlign8RCSm75(); + cutlass_check(status); + } } else { - cutlass::Status status = mGemmBatchedF16F16LnAlign1RCSm75(); - cutlass_check(status); + if(mConvertGemmSplitK) { + cutlass::Status status = mGemmF16F16LnAlign1Sm75(); + cutlass_check(status); + } else { + cutlass::Status status = mGemmBatchedF16F16LnAlign1RCSm75(); + cutlass_check(status); + } } } } - + // printf("normal:%d rrlayout:%d convertab:%d halign:%d\n", mFp16Fp32MixInfer, mUseRRLayout, mNeedConvertMatAB, hAlignment); return NO_ERROR; } diff --git a/source/backend/cuda/execution/MatMulExecution.hpp b/source/backend/cuda/execution/MatMulExecution.hpp index 4dcac2ed..ebd3fcb2 100644 --- a/source/backend/cuda/execution/MatMulExecution.hpp +++ b/source/backend/cuda/execution/MatMulExecution.hpp @@ -12,6 +12,7 @@ #include "backend/cuda/core/CUDABackend.hpp" #include "MNNCUDADefine.hpp" #include "CutlassGemmBatchedParam.hpp" +#include "CutlassGemmParam.hpp" #include "MNNCUDAFunction.cuh" namespace MNN { @@ -34,12 +35,18 @@ private: std::shared_ptr mBiasTensor; GemmBatchedTensor_F16_F16_Linear_AlignCuda_Row_Column_Sm75 mGemmBatchedF16F16LnAlign1RCSm75; + GemmTensor_F16_F16_Linear_AlignCuda_Sm75 mGemmF16F16LnAlign1Sm75; GemmBatchedTensor_F32_F32_Linear_AlignCuda_Row_Column_Sm75 mGemmBatchedF32F32LnAlign1RCSm75; + GemmTensor_F32_F32_Linear_AlignCuda_Sm75 mGemmF32F32LnAlign1Sm75; GemmBatchedTensor_F16_F32_Linear_AlignCuda_Row_Column_Sm75 mGemmBatchedF16F32LnAlign1RCSm75; + GemmTensor_F16_F32_Linear_AlignCuda_Sm75 mGemmF16F32LnAlign1Sm75; GemmBatchedTensor_F16_F16_Linear_AlignTensor_Row_Column_Sm75 mGemmBatchedF16F16LnAlign8RCSm75; + GemmTensor_F16_F16_Linear_AlignTensor_Sm75 mGemmF16F16LnAlign8Sm75; GemmBatchedTensor_F32_F32_Linear_AlignTensor_Row_Column_Sm75 mGemmBatchedF32F32LnAlign8RCSm75; + GemmTensor_F32_F32_Linear_AlignTensor_Sm75 mGemmF32F32LnAlign8Sm75; GemmBatchedTensor_F16_F32_Linear_AlignTensor_Row_Column_Sm75 mGemmBatchedF16F32LnAlign8RCSm75; + GemmTensor_F16_F32_Linear_AlignTensor_Sm75 mGemmF16F32LnAlign8Sm75; GemmBatchedTensor_F16_F16_Linear_AlignTensor_Row_Row_Sm75 mGemmBatchedF16F16LnAlign8RRSm75; GemmBatchedTensor_F32_F32_Linear_AlignTensor_Row_Row_Sm75 mGemmBatchedF32F32LnAlign8RRSm75; @@ -69,6 +76,7 @@ private: bool mFp16Infer = false; bool mFp32Infer = false; bool mFp16Fp32MixInfer = false; + bool mConvertGemmSplitK = false; }; } // namespace CUDA } // namespace MNN diff --git a/source/backend/cuda/execution/Raster.cu b/source/backend/cuda/execution/Raster.cu index c94c371a..dfa77e58 100644 --- a/source/backend/cuda/execution/Raster.cu +++ b/source/backend/cuda/execution/Raster.cu @@ -190,7 +190,7 @@ void RasterBlit(uint8_t* output, const uint8_t* input, const int32_t* size, cons DivModFast sy(size[1]); DivModFast sx(size[2]); - //printf("%d-%d-%d, %d-%d-%d,-%d-%d-%d\n", size[0], size[1], size[2], srcStride[0], srcStride[1], srcStride[2], dstStride[0], dstStride[1], dstStride[2]); + // MNN_PRINT("blit info size:%d-%d-%d, srcStride:%d-%d-%d, dstStride:%d-%d-%d\n", size[0], size[1], size[2], srcStride[0], srcStride[1], srcStride[2], dstStride[0], dstStride[1], dstStride[2]); if(bytes == 4 && count > 16384 && size[2] % 2 == 0 && srcStride[2] == 1 && dstStride[2] == 1) { //printf("%d-%d-%d, %d-%d-%d,-%d-%d-%d\n\n", size[0], size[1], size[2], srcStride[0], srcStride[1], srcStride[2], dstStride[0], dstStride[1], dstStride[2]); count /= 2; diff --git a/source/backend/cuda/execution/RasterExecution.cpp b/source/backend/cuda/execution/RasterExecution.cpp index f63534cb..87963c18 100644 --- a/source/backend/cuda/execution/RasterExecution.cpp +++ b/source/backend/cuda/execution/RasterExecution.cpp @@ -168,7 +168,18 @@ static bool _equalSizeStride(const Tensor::InsideDescribe::Region& slice0, const return true; } -static bool _directBlitC4(const Tensor::InsideDescribe::Region& slice0, const Tensor::InsideDescribe::Region& slice1) { +static bool _directBlitC4(const Tensor::InsideDescribe::Region& slice0, const Tensor::InsideDescribe::Region& slice1, Tensor* tensor) { + if(tensor->dimensions() < 2) { + return false; + } + if(slice0.src.stride[1] == tensor->width() && slice0.src.stride[0] == tensor->width() * tensor->height()) { + // area pack for fast blit only + return false; + } + if(slice1.src.stride[1] == tensor->width() && slice1.src.stride[0] == tensor->width() * tensor->height()) { + // area pack for fast blit only + return false; + } if(slice0.size[1] % PACK_NUMBER != 0 || slice0.size[0] != 1) { return false; } @@ -242,7 +253,7 @@ ErrorCode RasterExecution::onResize(const std::vector &____inputs, con mFast = false; break; } - if(!_directBlitC4(slice0, slice)) { + if(!_directBlitC4(slice0, slice, output)) { mFast = false; break; } diff --git a/source/backend/cuda/execution/ReductionExecution.cu b/source/backend/cuda/execution/ReductionExecution.cu index bbaa58fc..af5f1714 100755 --- a/source/backend/cuda/execution/ReductionExecution.cu +++ b/source/backend/cuda/execution/ReductionExecution.cu @@ -2,15 +2,86 @@ namespace MNN { namespace CUDA { +template +static void callSumFunc(const T* input, T* output, ReduceParam* param, CUDARuntime* runtime) { + int inside = param->inside; + int outside = param->outside; + int axis = param->axis; + int count = outside * inside; + + if(axis % 256 == 0 || axis >= 768) { + int calc_multi_num = (axis + 255) / 256; + SUM_REDUCE_AXIS<<>>(input, output, outside, axis, inside, 256, calc_multi_num); + checkKernelErrors; + } else if(axis >= 32) { + int calc_multi_num = (axis + 63) / 64; + SUM_REDUCE_AXIS<<>>(input, output, outside, axis, inside, 64, calc_multi_num); + checkKernelErrors; + } else { + int block_num = runtime->blocks_num(count); + int threads_num = runtime->threads_num(); + SUM_NAIVE<<>>(input, output, outside, axis, inside); + checkKernelErrors; + } +} + +template +static void callMeanFunc(const T* input, T* output, ReduceParam* param, CUDARuntime* runtime) { + int inside = param->inside; + int outside = param->outside; + int axis = param->axis; + int count = outside * inside; + + int block_num = runtime->blocks_num(count); + int threads_num = runtime->threads_num(); + MEAN<<>>(input, output, outside, axis, inside); + checkKernelErrors; +} + +template +static void callMaxFunc(const T* input, T* output, ReduceParam* param, CUDARuntime* runtime) { + int inside = param->inside; + int outside = param->outside; + int axis = param->axis; + int count = outside * inside; + + int block_num = runtime->blocks_num(count); + int threads_num = runtime->threads_num(); + MAXIMUM<<>>(input, output, outside, axis, inside); + checkKernelErrors; +} + +template +static void callMinFunc(const T* input, T* output, ReduceParam* param, CUDARuntime* runtime) { + int inside = param->inside; + int outside = param->outside; + int axis = param->axis; + int count = outside * inside; + + int block_num = runtime->blocks_num(count); + int threads_num = runtime->threads_num(); + MINIMUM<<>>(input, output, outside, axis, inside); + checkKernelErrors; +} + +template +static void callProdFunc(const T* input, T* output, ReduceParam* param, CUDARuntime* runtime) { + int inside = param->inside; + int outside = param->outside; + int axis = param->axis; + int count = outside * inside; + + int block_num = runtime->blocks_num(count); + int threads_num = runtime->threads_num(); + PROD<<>>(input, output, outside, axis, inside); + checkKernelErrors; +} + ReductionExecution::ReductionExecution(ReductionType opType, int axis, Backend *backend) : Execution(backend) { mType = opType; mAxis = axis; - auto staticPool = static_cast(backend)->getStaticBufferPool(); - mParam = staticPool->alloc(sizeof(ReduceParam)); } ReductionExecution::~ ReductionExecution() { - auto staticPool = static_cast(backend())->getStaticBufferPool(); - staticPool->free(mParam); } ErrorCode ReductionExecution::onResize(const std::vector &inputs, const std::vector &outputs) { @@ -27,9 +98,7 @@ ErrorCode ReductionExecution::onResize(const std::vector &inputs, cons mCpuParam.inside = inside; mCpuParam.outside = outside; mCpuParam.axis = axis; - cuda_check(cudaMemcpy((uint8_t*)mParam.first + mParam.second, &mCpuParam, sizeof(ReduceParam), cudaMemcpyHostToDevice)); - - //MNN_PRINT("Reduction axis_idx:%d, outside:%d, axis:%d, inside:%d\n", mAxis, outside, axis, inside); + // MNN_PRINT("Reduction axis_idx:%d, outside:%d, axis:%d, inside:%d\n", mAxis, outside, axis, inside); return NO_ERROR; } @@ -37,47 +106,46 @@ ErrorCode ReductionExecution::onExecute(const std::vector &inputs, con auto input = (void*)inputs[0]->deviceId(); auto output = (void*)outputs[0]->deviceId(); auto runtime = static_cast(backend())->getCUDARuntime(); - int inside = mCpuParam.inside;; + int inside = mCpuParam.inside; int outside = mCpuParam.outside; int count = inside * outside; int block_num = runtime->blocks_num(count); int threads_num = runtime->threads_num(); - auto param = (ReduceParam*)((uint8_t*)mParam.first + mParam.second); if (inputs[0]->getType() == halide_type_of()) { if (static_cast(backend())->useFp16()) { switch (mType) { case ReductionType_MEAN: - MEAN<<>>((const half*)input, (half*)output, param); + callMeanFunc((const half*)input, (half*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_SUM: - SUM<<>>((const half*)input, (half*)output, param); + callSumFunc((const half*)input, (half*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_MINIMUM: - MINIMUM<<>>((const half*)input, (half*)output, param); + callMinFunc((const half*)input, (half*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_MAXIMUM: - MAXIMUM<<>>((const half*)input, (half*)output, param); + callMaxFunc((const half*)input, (half*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_PROD: - PROD<<>>((const half*)input, (half*)output, param); + callProdFunc((const half*)input, (half*)output, &mCpuParam, runtime); return NO_ERROR; } } else { switch (mType) { case ReductionType_MEAN: - MEAN<<>>((const float*)input, (float*)output, param); + callMeanFunc((const float*)input, (float*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_SUM: - SUM<<>>((const float*)input, (float*)output, param); + callSumFunc((const float*)input, (float*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_MINIMUM: - MINIMUM<<>>((const float*)input, (float*)output, param); + callMinFunc((const float*)input, (float*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_MAXIMUM: - MAXIMUM<<>>((const float*)input, (float*)output, param); + callMaxFunc((const float*)input, (float*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_PROD: - PROD<<>>((const float*)input, (float*)output, param); + callProdFunc((const float*)input, (float*)output, &mCpuParam, runtime); return NO_ERROR; } } @@ -88,25 +156,26 @@ ErrorCode ReductionExecution::onExecute(const std::vector &inputs, con MNN_ASSERT(inputs[0]->getType() == halide_type_of()); switch (mType) { case ReductionType_MEAN: - MEAN<<>>((const int32_t*)input, (int32_t*)output, param); + callMeanFunc((const int32_t*)input, (int32_t*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_SUM: - SUM<<>>((const int32_t*)input, (int32_t*)output, param); + callSumFunc((const int32_t*)input, (int32_t*)output, &mCpuParam, runtime); + // SUM<<>>((const int32_t*)input, (int32_t*)output, param); return NO_ERROR; case ReductionType_MINIMUM: - MINIMUM<<>>((const int32_t*)input, (int32_t*)output, param); + callMinFunc((const int32_t*)input, (int32_t*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_MAXIMUM: - MAXIMUM<<>>((const int32_t*)input, (int32_t*)output, param); + callMaxFunc((const int32_t*)input, (int32_t*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_PROD: - PROD<<>>((const int32_t*)input, (int32_t*)output, param); + callProdFunc((const int32_t*)input, (int32_t*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_ANY: - MAXIMUM<<>>((const int32_t*)input, (int32_t*)output, param); + callMaxFunc((const int32_t*)input, (int32_t*)output, &mCpuParam, runtime); return NO_ERROR; case ReductionType_ALL: - MINIMUM<<>>((const int32_t*)input, (int32_t*)output, param); + callMinFunc((const int32_t*)input, (int32_t*)output, &mCpuParam, runtime); return NO_ERROR; } MNN_ASSERT(false); diff --git a/source/backend/cuda/execution/ReductionExecution.hpp b/source/backend/cuda/execution/ReductionExecution.hpp index a281e9ee..8a943efd 100644 --- a/source/backend/cuda/execution/ReductionExecution.hpp +++ b/source/backend/cuda/execution/ReductionExecution.hpp @@ -25,7 +25,6 @@ private: ReductionType mType; int mAxis; ReduceParam mCpuParam; - std::pair mParam; }; } // namespace CUDA } // namespace MNN diff --git a/source/backend/cuda/execution/ReductionTemplate.cuh b/source/backend/cuda/execution/ReductionTemplate.cuh index 3586e83c..2dc0b7cf 100644 --- a/source/backend/cuda/execution/ReductionTemplate.cuh +++ b/source/backend/cuda/execution/ReductionTemplate.cuh @@ -1,91 +1,143 @@ #ifndef ReductionTemplate_cuh #define ReductionTemplate_cuh + +#include "MNNCUDAFunction.cuh" struct ReduceParam { int inside; int axis; int outside; }; template -__global__ void SUM(const T *input, T *output, const ReduceParam* param) { - int count = param->inside * param->outside; +__global__ void SUM_NAIVE(const T *input, T *output, + const int outside, + const int axis, + const int inside +) { + int count = inside * outside; for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { - int y = i / param->inside; - int x = i % param->inside; + int y = i / inside; + int x = i % inside; float sumValue = 0.0; - int axis = param->axis; - const T* basicInput = input + y * param->axis * param->inside + x; + const T* basicInput = input + y * axis * inside + x; for (int v=0; vinside]; + sumValue += (float)basicInput[v * inside]; } - output[y * param->inside + x] = (T)sumValue; + output[y * inside + x] = (T)sumValue; } return; } template -__global__ void MEAN(const T *input, T *output, const ReduceParam* param) { - int count = param->inside * param->outside; - for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { - int y = i / param->inside; - int x = i % param->inside; - float sumValue = 0.0; - int axis = param->axis; - const T* basicInput = input + y * param->axis * param->inside + x; - for (int v=0; vinside]; +__global__ void SUM_REDUCE_AXIS(const T *input, T *output, + const int outside, + const int axis, + const int inside, + const int per_block_size, + const int calc_multi_num +) { + int idx_outside = blockIdx.x / inside; + int idx_inside = blockIdx.x - idx_outside * inside; + + const T* src = input + idx_outside * axis * inside + idx_inside; + int tid = threadIdx.x; + + float local_src = 0.0; + __shared__ float sumValue; + for(int i=0; iinside + x] = (T)(sumValue / (float)param->axis); + } + float maxRes = blockReduceSum(local_src); + if(tid == 0) + sumValue = maxRes; + __syncthreads(); + + output[idx_outside * inside + idx_inside] = (T)sumValue; + return; +} + + +template +__global__ void MEAN(const T *input, T *output, + const int outside, + const int axis, + const int inside +) { + int count = inside * outside; + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { + int y = i / inside; + int x = i % inside; + float sumValue = 0.0; + + const T* basicInput = input + y * axis * inside + x; + for (int v=0; v -__global__ void MINIMUM(const T *input, T *output, const ReduceParam* param) { - int count = param->inside * param->outside; +__global__ void MINIMUM(const T *input, T *output, + const int outside, + const int axis, + const int inside +) { + int count = inside * outside; for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { - int y = i / param->inside; - int x = i % param->inside; - int axis = param->axis; - const T* basicInput = input + y * param->axis * param->inside + x; + int y = i / inside; + int x = i % inside; + + const T* basicInput = input + y * axis * inside + x; float res = (float)basicInput[0]; for (int v=1; vinside], res); + res = min((float)basicInput[v * inside], res); } - output[y * param->inside + x] = (T)res; + output[y * inside + x] = (T)res; } return; } template -__global__ void MAXIMUM(const T *input, T *output, const ReduceParam* param) { - int count = param->inside * param->outside; +__global__ void MAXIMUM(const T *input, T *output, + const int outside, + const int axis, + const int inside +) { + int count = inside * outside; for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { - int y = i / param->inside; - int x = i % param->inside; - const T* basicInput = input + y * param->axis * param->inside + x; - int axis = param->axis; + int y = i / inside; + int x = i % inside; + const T* basicInput = input + y * axis * inside + x; + float res = (float)basicInput[0]; for (int v=1; vinside], res); + res = max((float)basicInput[v * inside], res); } - output[y * param->inside + x] = (T)res; + output[y * inside + x] = (T)res; } return; } template -__global__ void PROD(const T *input, T *output, const ReduceParam* param) { - int count = param->inside * param->outside; +__global__ void PROD(const T *input, T *output, + const int outside, + const int axis, + const int inside +) { + int count = inside * outside; for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { - int y = i / param->inside; - int x = i % param->inside; - int axis = param->axis; + int y = i / inside; + int x = i % inside; + float sumValue = 1.0; - const T* basicInput = input + y * param->axis * param->inside + x; + const T* basicInput = input + y * axis * inside + x; for (int v=0; vinside]; + sumValue *= (float)basicInput[v * inside]; } - output[y * param->inside + x] = (T)sumValue; + output[y * inside + x] = (T)sumValue; } return; } diff --git a/source/backend/cuda/execution/SoftmaxExecution.cu b/source/backend/cuda/execution/SoftmaxExecution.cu index cd774bad..519f53e0 100644 --- a/source/backend/cuda/execution/SoftmaxExecution.cu +++ b/source/backend/cuda/execution/SoftmaxExecution.cu @@ -30,62 +30,6 @@ __global__ void SOFTMAX(const T *input, T *output, } } -template -__inline__ __device__ -T warpReduceSum(T val) -{ - for(int mask = 16; mask > 0; mask >>= 1) - val += __shfl_xor_sync(0xffffffff, val, mask, 32); - return val; -} - -template -__inline__ __device__ -T warpReduceMax(T val) -{ - for(int mask = 16; mask > 0; mask >>= 1) - val = max(val, __shfl_xor_sync(0xffffffff, val, mask, 32)); - return val; -} - -template -__inline__ __device__ -T blockReduceSum(T val) -{ - static __shared__ T shared[32]; - int lane = threadIdx.x & 0x1f; - int wid = threadIdx.x >> 5; - - val = warpReduceSum(val); - - if(lane == 0) - shared[wid] = val; - __syncthreads(); - - val = (threadIdx.x < (blockDim.x >> 5 )) ? shared[lane] : (T)0.0f; - val = warpReduceSum(val); - return val; -} - -template -__inline__ __device__ -T blockReduceMax(T val) -{ - static __shared__ T shared[32]; - int lane = threadIdx.x & 0x1f; - int wid = threadIdx.x >> 5; - - val = warpReduceMax(val); - - if(lane == 0) - shared[wid] = val; - __syncthreads(); - - val = (threadIdx.x < (blockDim.x >> 5 )) ? shared[lane] : (T)0.0f; - val = warpReduceMax(val); - return val; -} - template __global__ void SOFTMAX_WARP_32(const T *input, T *output, const int inside, diff --git a/source/backend/cuda/execution/SoftmaxExecution.hpp b/source/backend/cuda/execution/SoftmaxExecution.hpp index d25bf0a5..47cb300c 100644 --- a/source/backend/cuda/execution/SoftmaxExecution.hpp +++ b/source/backend/cuda/execution/SoftmaxExecution.hpp @@ -11,6 +11,7 @@ #include #include "ReductionTemplate.cuh" +#include "MNNCUDAFunction.cuh" #include "backend/cuda/core/CUDABackend.hpp" #include diff --git a/source/backend/cuda/execution/make_cutlass_param.py b/source/backend/cuda/execution/make_cutlass_param.py index 20838573..55964fc5 100644 --- a/source/backend/cuda/execution/make_cutlass_param.py +++ b/source/backend/cuda/execution/make_cutlass_param.py @@ -143,7 +143,11 @@ def generateGemmFile(headfile): hpp += out_align + out_precision_name + epilogue_name + ",\n " hpp += "SwizzleThreadBlock,\n " - hpp += "NumStages>;\n\n" + hpp += "NumStages" + if sm_name == "_Sm75": + hpp += ",\n 128 / cutlass::sizeof_bits<" + element_input_precision + ">::value, 128 / cutlass::sizeof_bits<" + element_input_precision + ">::value, true>;\n\n" + else : + hpp += ">;\n\n" hpp += "}\n}\n#endif" with open(headfile, "w") as f: diff --git a/source/backend/opencl/core/OpenCLBackend.cpp b/source/backend/opencl/core/OpenCLBackend.cpp index 6700f249..ce85fccc 100644 --- a/source/backend/opencl/core/OpenCLBackend.cpp +++ b/source/backend/opencl/core/OpenCLBackend.cpp @@ -428,20 +428,6 @@ Execution* OpenCLBackend::onCreate(const std::vector& inputs, const std valid = false; break; } - - //input in raster not used, origin instead - auto des = TensorUtils::getDescribe(t)->regions; - for(auto region : des) - { - auto tensor = region.origin; - auto tensorShape = OpenCL::tensorShapeFormat(tensor); - int originHeight = tensorShape[0] * tensorShape[1]; - int originWidth = tensorShape[2] * UP_DIV(tensorShape[3], 4); - if (originHeight > maxImageSize.at(0) || originWidth > maxImageSize.at(1)) { - valid = false; - break; - } - } } for (auto t : outputs) { auto tensorShape = OpenCL::tensorShapeFormat(t); diff --git a/source/backend/opencl/core/runtime/OpenCLRuntime.cpp b/source/backend/opencl/core/runtime/OpenCLRuntime.cpp index 484919b8..dea15074 100644 --- a/source/backend/opencl/core/runtime/OpenCLRuntime.cpp +++ b/source/backend/opencl/core/runtime/OpenCLRuntime.cpp @@ -123,15 +123,8 @@ OpenCLRuntime::OpenCLRuntime(const BackendConfig::PrecisionMode precision, const isSetWorkGroupAttribute = true; } else if (deviceVendor.find("Intel") != std::string::npos) { mGpuType = INTEL; - std::string opencl_c_version = mFirstGPUDevicePtr->getInfo(); - int version = 0; - for (auto s : opencl_c_version) { - if (s >= '0' && s <= '9') { - version += (s - '0'); - version *= 10; - } - } - if (version >= 120) { + const std::string extensions = mFirstGPUDevicePtr->getInfo(); + if (extensions.find("cl_intel_subgroups") != std::string::npos) { mSupportedIntelSubgroup = true; uint32_t execution_units_count = mFirstGPUDevicePtr->getInfo(); uint32_t num_threads_per_eu = mFirstGPUDevicePtr->getInfo(); diff --git a/source/backend/opencl/execution/buffer/BinaryBufExecution.cpp b/source/backend/opencl/execution/buffer/BinaryBufExecution.cpp index 9ea4a0aa..51d837a8 100644 --- a/source/backend/opencl/execution/buffer/BinaryBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/BinaryBufExecution.cpp @@ -16,10 +16,8 @@ namespace MNN { namespace OpenCL { BinaryBufExecution::BinaryBufExecution(const std::vector &inputs, const std::string &compute, const MNN::Op *op, Backend *backend) - : CommonExecution(backend), mCompute(compute) { + : CommonExecution(backend, op), mCompute(compute) { mBuildOptions.emplace("-DOPERATOR=" + compute); - mOp = op; - mOpType = op->type(); } uint32_t BinaryBufExecution::realSize(const Tensor* tensor) { diff --git a/source/backend/opencl/execution/buffer/LoopBufExecution.cpp b/source/backend/opencl/execution/buffer/LoopBufExecution.cpp new file mode 100644 index 00000000..3c9fed9e --- /dev/null +++ b/source/backend/opencl/execution/buffer/LoopBufExecution.cpp @@ -0,0 +1,351 @@ +// +// LoopBufExecution.cpp +// MNN +// +// Created by MNN on 2019/02/28. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef MNN_OPENCL_BUFFER_CLOSED + +#include "backend/opencl/execution/buffer/LoopBufExecution.hpp" +#include "core/Macro.h" +#include "core/TensorUtils.hpp" + +namespace MNN { +namespace OpenCL { + +static void _TileOrPackTensor(Tensor *input, Tensor *output, cl::Kernel& kernel, cl::NDRange &globalWorkSize, + cl::NDRange &localWorkSize, const int Width, const int Height, const int Channel, + const int Batch, OpenCLRuntime *runTime, const std::string &KernelName, const std::set &buildOptions) { + kernel = runTime->buildKernel("loop_buf", KernelName, buildOptions); + uint32_t mMaxWorkGroupSize = static_cast(runTime->getMaxWorkGroupSize(kernel)); + std::vector mGlobalWorkSize = {(uint32_t)(Width * Height), (uint32_t)(UP_DIV(Channel, 4)), (uint32_t)(Batch)}; + + uint32_t index = 0; + kernel.setArg(index++, mGlobalWorkSize[0]); + kernel.setArg(index++, mGlobalWorkSize[1]); + kernel.setArg(index++, mGlobalWorkSize[2]); + kernel.setArg(index++, openCLBuffer(input)); + kernel.setArg(index++, openCLBuffer(output)); + kernel.setArg(index++, Width); + kernel.setArg(index++, Height); + kernel.setArg(index++, Channel); + + std::vector mLocalWorkSize = localWS3DDefault(mGlobalWorkSize, mMaxWorkGroupSize, runTime, KernelName, kernel).first; + + globalWorkSize = {mGlobalWorkSize[0], mGlobalWorkSize[1], mGlobalWorkSize[2]}; + localWorkSize = {mLocalWorkSize[0], mLocalWorkSize[1], mLocalWorkSize[2]}; +} + +static void _setTensorStack(std::vector &result, const std::vector &inputs, + const std::vector &outputs, const LoopParam *loop) { + if (loop->inputIndexes() != nullptr) { + for (int i = 0; i < loop->inputIndexes()->size(); ++i) { + result[loop->inputIndexes()->data()[i]] = inputs[i]; + } + } + for (int i = 0; i < loop->outputIndexes()->size(); ++i) { + result[loop->outputIndexes()->data()[i]] = outputs[i]; + } +} + + + LoopGatherBufExecution::LoopGatherBufExecution(const LoopParam *loop, const MNN::Op *op, Backend *bn) + : CommonExecution(bn, op) { + mLoop = loop; + mTensors.resize(mLoop->tensorNumber()); + auto cmd = loop->commands()->GetAs(0); + } + ErrorCode LoopGatherBufExecution::onResize(const std::vector &inputs, const std::vector &outputs) { + auto cmd = mLoop->commands()->GetAs(0); + OpenCLBackend *mOpenCLBackend = (OpenCLBackend *)backend(); + auto runTime = mOpenCLBackend->getOpenCLRuntime(); + _setTensorStack(mTensors, inputs, outputs, mLoop); + mUnits.clear(); + mOffsetTensors.clear(); + mTmpTensors.resize(2); + int x = cmd->size()->data()[0]; + int y = cmd->size()->data()[1]; + int z = cmd->size()->data()[2]; + int n = mLoop->loopNumber(); + + auto srcStride = cmd->view()->GetAs(1)->stride()->data(); + auto dstStride = cmd->view()->GetAs(0)->stride()->data(); + for (int i = 0; i < 3; ++i) { + mStride_src[i] = srcStride[i]; + mStride_dst[i] = dstStride[i]; + } + + mStride_src[3] = cmd->view()->GetAs(1)->offset(); + mStride_dst[3] = cmd->view()->GetAs(0)->offset(); + ::memcpy(mStep, cmd->steps()->data(), cmd->steps()->size() * sizeof(int)); + ::memcpy(mIter, cmd->iterIndexes()->data(), cmd->iterIndexes()->size() * sizeof(int)); + + // tile input + { + auto input = mTensors[cmd->indexes()->data()[1]]; + std::vector Shape = tensorShapeFormat(input); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + mTmpTensors[1] = std::make_shared(Tensor::createDevice(std::vector{Batch, Channel, Height, Width})); + mOpenCLBackend->onAcquireBuffer(mTmpTensors[1].get(), Backend::DYNAMIC); + + Unit unit; + _TileOrPackTensor(mTensors[cmd->indexes()->data()[1]], mTmpTensors[1].get(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height,Channel, Batch, runTime, "tile_buf", mBuildOptions); + mUnits.emplace_back(unit); + } + + for(int i = 0; i < cmd->iterIndexes()->size(); ++i){ + if (mIter[i] >= 0) { + auto input = mTensors[cmd->iterIndexes()->data()[i]]; + std::vector Shape = tensorShapeFormat(input); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + mOffsetTensors.emplace_back(std::make_shared(Tensor::createDevice(std::vector{Batch, Channel, Height, Width}))); + mOpenCLBackend->onAcquireBuffer(mOffsetTensors.back().get(), Backend::DYNAMIC); + + Unit unit; + _TileOrPackTensor(input, mOffsetTensors.back().get(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, "tile_buf", mBuildOptions); + mUnits.emplace_back(unit); + } + } + + // gather + { + mTmpTensors[0] = std::make_shared(Tensor::createDevice(std::vector{n, z, y, x})); + mOpenCLBackend->onAcquireBuffer(mTmpTensors[0].get(), Backend::DYNAMIC); + int offset_index = 0; + + Unit unit; + std::string KernelName = "batch_gather_buf"; + unit.kernel = runTime->buildKernel("loop_buf", KernelName, mBuildOptions); + uint32_t mMaxWorkGroupSize = static_cast(runTime->getMaxWorkGroupSize(unit.kernel)); + std::vector mGlobalWorkSize = {(uint32_t)(x * y), (uint32_t)(z), (uint32_t)(n)}; + + uint32_t index = 0; + unit.kernel.setArg(index++, mGlobalWorkSize[0]); + unit.kernel.setArg(index++, mGlobalWorkSize[1]); + unit.kernel.setArg(index++, mGlobalWorkSize[2]); + unit.kernel.setArg(index++, openCLBuffer(mTmpTensors[0].get())); + unit.kernel.setArg(index++, openCLBuffer(mTmpTensors[1].get())); + for (int i = 0; i < cmd->iterIndexes()->size(); ++i) { + if (mIter[i] >= 0) { + unit.kernel.setArg(index++, openCLBuffer(mOffsetTensors[offset_index++].get())); + } else { + unit.kernel.setArg(index++, openCLBuffer(mTensors[cmd->indexes()->data()[1]])); + } + } + unit.kernel.setArg(index++, x); + unit.kernel.setArg(index++, sizeof(mStride_src), mStride_src); + unit.kernel.setArg(index++, sizeof(mStride_dst), mStride_dst); + unit.kernel.setArg(index++, sizeof(mStep), mStep); + unit.kernel.setArg(index++, sizeof(mIter), mIter); + + std::vector mLocalWorkSize = localWS3DDefault(mGlobalWorkSize, mMaxWorkGroupSize, runTime, KernelName, unit.kernel).first; + + unit.globalWorkSize = {mGlobalWorkSize[0], mGlobalWorkSize[1], mGlobalWorkSize[2]}; + unit.localWorkSize = {mLocalWorkSize[0], mLocalWorkSize[1], mLocalWorkSize[2]}; + mUnits.emplace_back(unit); + } + + //pack output + { + auto output = mTensors[cmd->indexes()->data()[0]]; + std::vector Shape = tensorShapeFormat(output); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + Unit unit; + _TileOrPackTensor(mTmpTensors[0].get(), mTensors[cmd->indexes()->data()[0]], unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, "pack_buf", mBuildOptions); + mUnits.emplace_back(unit); + } + + for (int i = 0; i < mTmpTensors.size(); ++i) { + mOpenCLBackend->onReleaseBuffer(mTmpTensors[i].get(), Backend::DYNAMIC); + } + for (int i = 0; i < mOffsetTensors.size(); ++i) { + mOpenCLBackend->onReleaseBuffer(mOffsetTensors[i].get(), Backend::DYNAMIC); + } + + return NO_ERROR; + } + + +LoopBatchMatMulBufExecution::LoopBatchMatMulBufExecution(const LoopParam *loop, const MNN::Op *op, Backend *bn) + : CommonExecution(bn, op) { + mLoop = loop; + mTensors.resize(mLoop->tensorNumber()); + auto cmd = loop->commands()->GetAs(0); + mHasBias = cmd->indexes()->size() > 3; + mTransposeA = cmd->op()->main_as_MatMul()->transposeA(); + mTransposeB = cmd->op()->main_as_MatMul()->transposeB(); +} +ErrorCode LoopBatchMatMulBufExecution::onResize(const std::vector &inputs, const std::vector &outputs) { + auto cmd = mLoop->commands()->GetAs(0); + OpenCLBackend *mOpenCLBackend = (OpenCLBackend *)backend(); + auto runTime = mOpenCLBackend->getOpenCLRuntime(); + _setTensorStack(mTensors, inputs, outputs, mLoop); + + mOffset[0] = cmd->view()->GetAs(0)->offset(); + mOffset[1] = cmd->view()->GetAs(1)->offset(); + mOffset[2] = cmd->view()->GetAs(2)->offset(); + mUnits.clear(); + mOffsetTensors.clear(); + mTmpTensors.resize(3); + if (mHasBias) { + mTmpTensors.resize(4); + mOffset[3] = cmd->view()->GetAs(3)->offset(); + } + + ::memcpy(mStep, cmd->steps()->data(), cmd->steps()->size() * sizeof(int)); + ::memcpy(mIter, cmd->iterIndexes()->data(), cmd->iterIndexes()->size() * sizeof(int)); + int e = cmd->size()->data()[0]; + int l = cmd->size()->data()[1]; + int h = cmd->size()->data()[2]; + int n = mLoop->loopNumber(); + + // tile input + for (int i = 1; i < cmd->indexes()->size(); ++i) { + auto input = mTensors[cmd->indexes()->data()[i]]; + std::vector Shape = tensorShapeFormat(input); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + mTmpTensors[i] = std::make_shared(Tensor::createDevice(std::vector{Batch, Channel, Height, Width})); + mOpenCLBackend->onAcquireBuffer(mTmpTensors[i].get(), Backend::DYNAMIC); + + Unit unit; + _TileOrPackTensor(input, mTmpTensors[i].get(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, "tile_buf", mBuildOptions); + mUnits.emplace_back(unit); + } + + for(int i = 0; i < cmd->iterIndexes()->size(); ++i){ + if (mIter[i] >= 0) { + auto input = mTensors[cmd->iterIndexes()->data()[i]]; + std::vector Shape = tensorShapeFormat(input); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + mOffsetTensors.emplace_back(std::make_shared(Tensor::createDevice(std::vector{Batch, Channel, Height, Width}))); + mOpenCLBackend->onAcquireBuffer(mOffsetTensors.back().get(), Backend::DYNAMIC); + + Unit unit; + _TileOrPackTensor(input, mOffsetTensors.back().get(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, "tile_buf", mBuildOptions); + mUnits.emplace_back(unit); + } + } + + // matmul + { + mTmpTensors[0] = std::make_shared(Tensor::createDevice(std::vector{1, n, e, h})); + mOpenCLBackend->onAcquireBuffer(mTmpTensors[0].get(), Backend::DYNAMIC); + int offset_index = 0; + + Unit unit; + std::string KernelName = "batch_matmul_buf"; + if (mHasBias) { + mBuildOptions.emplace("-DBIAS"); + } + if (mTransposeA) { + mBuildOptions.emplace("-DTRANSPOSE_A"); + } + if (mTransposeB) { + mBuildOptions.emplace("-DTRANSPOSE_B"); + } + unit.kernel = runTime->buildKernel("loop_buf", KernelName, mBuildOptions); + uint32_t mMaxWorkGroupSize = static_cast(runTime->getMaxWorkGroupSize(unit.kernel)); + std::vector mGlobalWorkSize = {(uint32_t)(h), (uint32_t)(e),(uint32_t)(n)}; + + uint32_t index = 0; + unit.kernel.setArg(index++, mGlobalWorkSize[0]); + unit.kernel.setArg(index++, mGlobalWorkSize[1]); + unit.kernel.setArg(index++, mGlobalWorkSize[2]); + unit.kernel.setArg(index++, openCLBuffer(mTmpTensors[0].get())); + unit.kernel.setArg(index++, openCLBuffer(mTmpTensors[1].get())); + unit.kernel.setArg(index++, openCLBuffer(mTmpTensors[2].get())); + if (mHasBias) { + unit.kernel.setArg(index++, openCLBuffer(mTmpTensors[3].get())); + } + for (int i = 0; i < cmd->iterIndexes()->size(); ++i) { + if (mIter[i] >= 0) { + unit.kernel.setArg(index++, openCLBuffer(mOffsetTensors[offset_index++].get())); + } else { + unit.kernel.setArg(index++, openCLBuffer(mTensors[cmd->indexes()->data()[1]])); + } + } + unit.kernel.setArg(index++, e); + unit.kernel.setArg(index++, l); + unit.kernel.setArg(index++, h); + unit.kernel.setArg(index++, sizeof(mOffset), mOffset); + unit.kernel.setArg(index++, sizeof(mIter), mIter); + unit.kernel.setArg(index++, sizeof(mStep), mStep); + + std::vector mLocalWorkSize = localWS3DDefault(mGlobalWorkSize, mMaxWorkGroupSize, runTime, KernelName, unit.kernel).first; + + unit.globalWorkSize = {mGlobalWorkSize[0], mGlobalWorkSize[1], mGlobalWorkSize[2]}; + unit.localWorkSize = {mLocalWorkSize[0], mLocalWorkSize[1], mLocalWorkSize[2]}; + mUnits.emplace_back(unit); + } + + //pack output + { + auto output = mTensors[cmd->indexes()->data()[0]]; + std::vector Shape = tensorShapeFormat(output); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + Unit unit; + _TileOrPackTensor(mTmpTensors[0].get(), output, unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, "pack_buf", mBuildOptions); + mUnits.emplace_back(unit); + } + + for (int i = 0; i < cmd->indexes()->size(); ++i) { + mOpenCLBackend->onReleaseBuffer(mTmpTensors[i].get(), Backend::DYNAMIC); + } + for (int i = 0; i < mOffsetTensors.size(); ++i) { + mOpenCLBackend->onReleaseBuffer(mOffsetTensors[i].get(), Backend::DYNAMIC); + } + + return NO_ERROR; +} + +class LoopBufCreator : public OpenCLBackend::Creator { +public: + virtual Execution *onCreate(const std::vector &inputs, const std::vector &outputs, + const MNN::Op *op, Backend *backend) const override { + auto loop = op->main_as_LoopParam(); + if (nullptr == loop || loop->commands() == nullptr) { + return nullptr; + } + if (nullptr != loop->initCommand()) { + return nullptr; + } + // Make Tensor Stack + if (1 == loop->commands()->size()) { + auto cmd = loop->commands()->GetAs(0); + auto subop = cmd->op(); + if (OpType_UnaryOp == subop->type() && nullptr == subop->main() && cmd->fuse() < 0) { + return new LoopGatherBufExecution(loop, op, backend); + } + if (OpType_MatMul == subop->type() && loop->parallel()) { + return new LoopBatchMatMulBufExecution(loop, op, backend); + } + } + return nullptr; + } +}; + +OpenCLCreatorRegister __LoopBuf_op(OpType_While, BUFFER); + +} // namespace OpenCL +} // namespace MNN +#endif /* MNN_OPENCL_BUFFER_CLOSED */ diff --git a/source/backend/opencl/execution/buffer/LoopBufExecution.hpp b/source/backend/opencl/execution/buffer/LoopBufExecution.hpp new file mode 100644 index 00000000..baf6b068 --- /dev/null +++ b/source/backend/opencl/execution/buffer/LoopBufExecution.hpp @@ -0,0 +1,60 @@ +// +// LoopBufExecution.hpp +// MNN +// +// Created by MNN on 2023/04/23. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef MNN_OPENCL_BUFFER_CLOSED + +#ifndef LoopBufExecution_hpp +#define LoopBufExecution_hpp + +#include "backend/opencl/execution/image/CommonExecution.hpp" + +namespace MNN { +namespace OpenCL { + +class LoopGatherBufExecution : public CommonExecution { +public: + LoopGatherBufExecution(const LoopParam *loop, const MNN::Op *op, Backend *bn); + virtual ~LoopGatherBufExecution() = default; + virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; + +private: + const LoopParam *mLoop; + std::vector mTensors; + std::vector> mTmpTensors; + std::vector> mOffsetTensors; + int mStride_src[4]; + int mStride_dst[4]; + int mStep[2]; + int mIter[2]; + std::set mBuildOptions; +}; + +class LoopBatchMatMulBufExecution : public CommonExecution { +public: + LoopBatchMatMulBufExecution(const LoopParam *loop, const MNN::Op *op, Backend *bn); + virtual ~LoopBatchMatMulBufExecution() = default; + virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; + +private: + const LoopParam *mLoop; + std::vector mTensors; + std::vector> mTmpTensors; + std::vector> mOffsetTensors; + int mOffset[4]; + int mStep[4]; + int mIter[4]; + bool mHasBias = false; + bool mTransposeA = false; + bool mTransposeB = false; + std::set mBuildOptions; +}; + +} // namespace OpenCL +} // namespace MNN +#endif /* LoopBufExecution_hpp */ +#endif /* MNN_OPENCL_BUFFER_CLOSED */ diff --git a/source/backend/opencl/execution/buffer/RasterBufExecution.cpp b/source/backend/opencl/execution/buffer/RasterBufExecution.cpp index 66963e39..10bc65df 100644 --- a/source/backend/opencl/execution/buffer/RasterBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/RasterBufExecution.cpp @@ -18,10 +18,8 @@ namespace MNN { namespace OpenCL { RasterBufExecution::RasterBufExecution(const std::vector &inputs, const MNN::Op *op, Backend *backend) - : CommonExecution(backend) { + : CommonExecution(backend, op) { mOpenCLBackend = (OpenCLBackend *)backend; - mOp = op; - mOpType = op->type(); //nothing to do } diff --git a/source/backend/opencl/execution/buffer/ReductionBufExecution.cpp b/source/backend/opencl/execution/buffer/ReductionBufExecution.cpp index 8f3da7e6..684246d8 100644 --- a/source/backend/opencl/execution/buffer/ReductionBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/ReductionBufExecution.cpp @@ -15,7 +15,7 @@ namespace MNN { namespace OpenCL { -ReductionBufExecution::ReductionBufExecution(const MNN::Op* op, Backend* backend) : CommonExecution(backend) { +ReductionBufExecution::ReductionBufExecution(const MNN::Op* op, Backend* backend) : CommonExecution(backend, op) { #ifdef LOG_VERBOSE MNN_PRINT("start ReductionBufExecution init !\n"); #endif @@ -46,7 +46,6 @@ ReductionBufExecution::ReductionBufExecution(const MNN::Op* op, Backend* backend MNN_ASSERT(false); break; } - mOp = op; #ifdef LOG_VERBOSE MNN_PRINT("end ReductionBufExecution init !\n"); #endif @@ -70,20 +69,20 @@ ErrorCode ReductionBufExecution::onResize(const std::vector &inputs, c std::set buildOption; switch (mReductType) { case 0: - buildOption.emplace("-DOPERATE=num+in"); + buildOption.emplace("-DOPERATE(a,b)=(a+b)"); buildOption.emplace("-DGET_AVG"); break; case 1: - buildOption.emplace("-DOPERATE=max(num,in)"); + buildOption.emplace("-DOPERATE(a,b)=max(a,b)"); break; case 2: - buildOption.emplace("-DOPERATE=min(num,in)"); + buildOption.emplace("-DOPERATE(a,b)=min(a,b)"); break; case 3: - buildOption.emplace("-DOPERATE=num*in"); + buildOption.emplace("-DOPERATE(a,b)=(a*b)"); break; case 4: - buildOption.emplace("-DOPERATE=num+in"); + buildOption.emplace("-DOPERATE(a,b)=(a+b)"); break; default: MNN_ASSERT(false); @@ -103,6 +102,7 @@ ErrorCode ReductionBufExecution::onResize(const std::vector &inputs, c mReduct1DKernel.setArg(idx++, static_cast(inputShape[0])); mReduct1DKernel.setArg(idx++, static_cast(inputShape[1])); mReduct1DKernel.setArg(idx++, static_cast(inputShape[2])); + mReduct1DKernel.setArg(idx++, static_cast(inputShape[3])); return NO_ERROR; } diff --git a/source/backend/opencl/execution/buffer/ReluBufExecution.cpp b/source/backend/opencl/execution/buffer/ReluBufExecution.cpp index 1388fc9d..f994e9b0 100644 --- a/source/backend/opencl/execution/buffer/ReluBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/ReluBufExecution.cpp @@ -15,7 +15,7 @@ namespace MNN { namespace OpenCL { ReluBufExecution::ReluBufExecution(const std::vector &inputs, const MNN::Op *op, Backend *backend) - : CommonExecution(backend) { + : CommonExecution(backend, op) { mOpenCLBackend = static_cast(backend); auto mPreluParamPtr = op->main_as_PRelu(); int preluSize = mPreluParamPtr->slopeCount(); @@ -50,9 +50,6 @@ ReluBufExecution::ReluBufExecution(const std::vector &inputs, const MN MNN_ERROR("Map error preluDataPtrCL == nullptr \n"); } mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueUnmapMemObject(preluBuffer, preluDataPtrCL); - - mOp = op; - mOpType = op->type(); } ReluBufExecution::~ReluBufExecution() { diff --git a/source/backend/opencl/execution/cl/loop.cl b/source/backend/opencl/execution/cl/loop.cl new file mode 100644 index 00000000..9c1b782a --- /dev/null +++ b/source/backend/opencl/execution/cl/loop.cl @@ -0,0 +1,160 @@ +#ifdef MNN_SUPPORT_FP16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +__constant sampler_t SAMPLER = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + +__kernel void batch_matmul(__private int global_dim0, __private int global_dim1, __private int global_dim2, + __global FLOAT* output, __global FLOAT* input_A, __global FLOAT* input_B, +#ifdef BIAS + __global FLOAT* input_C, +#endif + __global FLOAT* offset_O, __global FLOAT* offset_A, __global FLOAT* offset_B, +#ifdef BIAS + __global FLOAT* offset_C, +#endif + __private const int e, + __private const int l, + __private const int h, + __private const int4 offsets, + __private const int4 iters, + __private const int4 steps) { + int3 pos = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + + if (pos.x < global_dim0 && pos.y < global_dim1 && pos.z < global_dim2) { + int4 index = (int4)(pos.z); + if (iters.x >= 0) { + index.x = (int)(offset_O[pos.z]); + } + if (iters.y >= 0) { + index.y = (int)(offset_A[pos.z]); + } + if (iters.z >= 0) { + index.z = (int)(offset_B[pos.z]); + } +#ifdef BIAS + if (iters.w >= 0) { + index.w = (int)(offset_C[pos.z]); + } +#endif + int4 offset = index * steps + offsets; + +#if TRANSPOSE_A + __global FLOAT* A_ptr = input_A + offset.y + pos.y; +#else + __global FLOAT* A_ptr = input_A + offset.y + pos.y * l; +#endif + +#if TRANSPOSE_B + __global FLOAT* B_ptr = input_B + offset.z + pos.x * l; +#else + __global FLOAT* B_ptr = input_B + offset.z + pos.x; +#endif + +#ifdef BIAS + FLOAT value = input_C[offset.w + pos.x]; +#else + FLOAT value = 0; +#endif + + for(int i = 0; i < l; ++i){ +#if TRANSPOSE_A + FLOAT value_a = A_ptr[i * e]; +#else + FLOAT value_a = A_ptr[i]; +#endif + +#if TRANSPOSE_B + FLOAT value_b = B_ptr[i]; +#else + FLOAT value_b = B_ptr[i * h]; +#endif + + value = mad(value_a, value_b, value); + } + + output[offset.x + pos.y * h + pos.x] = value; + } +} + +__kernel void tile(__private int global_dim0, __private int global_dim1, __private int global_dim2, + __read_only image2d_t input, + __global FLOAT* output, + __private const int width, + __private const int height, + __private const int channel){ + int3 pos = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + if (pos.x < global_dim0 && pos.y < global_dim1 && pos.z < global_dim2) { + const int w = pos.x % width; + const int h = pos.x / width; + const int c = pos.y << 2; + + const int x_dst_pitch = 1; + const int y_dst_pitch = x_dst_pitch * width; + const int c_dst_pitch = y_dst_pitch * height; + const int b_dst_pitch = c_dst_pitch * channel; + __global FLOAT* dst_ptr = output + pos.z * b_dst_pitch + c * c_dst_pitch + h * y_dst_pitch + w * x_dst_pitch; + + FLOAT4 value = RI_F(input, SAMPLER, (int2)(pos.y * width + w, pos.z * height + h)); + dst_ptr[0] = value.x; + if(c + 1 >= channel)return; + dst_ptr[c_dst_pitch] = value.y; + if(c + 2 >= channel)return; + dst_ptr[2 * c_dst_pitch] = value.z; + if(c + 3 >= channel)return; + dst_ptr[3 * c_dst_pitch] = value.w; + } +} + +__kernel void pack(__private int global_dim0, __private int global_dim1, __private int global_dim2, + __global FLOAT* input, + __write_only image2d_t output, + __private const int width, + __private const int height, + __private const int channel){ + int3 pos = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + if (pos.x < global_dim0 && pos.y < global_dim1 && pos.z < global_dim2) { + const int w = pos.x % width; + const int h = pos.x / width; + const int c = pos.y << 2; + + const int x_src_pitch = 1; + const int y_src_pitch = x_src_pitch * width; + const int c_src_pitch = y_src_pitch * height; + const int b_src_pitch = c_src_pitch * channel; + __global FLOAT* src_ptr = input + pos.z * b_src_pitch + c * c_src_pitch + h * y_src_pitch + w * x_src_pitch; + FLOAT4 value = (FLOAT4)0; + FLOAT *value_ptr = (FLOAT*)&value; + for(int i = 0; i < 4 && (i + c < channel); ++i){ + value_ptr[i] = src_ptr[i * c_src_pitch]; + } + WI_F(output, (int2)(pos.y * width + w, pos.z * height + h), value); + } +} + +__kernel void batch_gather(__private int global_dim0, __private int global_dim1, __private int global_dim2, + __global FLOAT* output, __global FLOAT* input, + __global FLOAT* offset_dst, __global FLOAT* offset_src, + __private const int x_size, + __private const int4 stride_src, + __private const int4 stride_dst, + __private const int2 steps, + __private const int2 iters) { + int3 pos = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + + if (pos.x < global_dim0 && pos.y < global_dim1 && pos.z < global_dim2) { + + int x = pos.x % x_size; + int y = pos.x / x_size; + + int2 index = (int2)(pos.z, pos.z); + if (iters.x >= 0) { + index.x = (int)(offset_dst[pos.z]); + } + if (iters.y >= 0) { + index.y = (int)(offset_src[pos.z]); + } + int2 offset = index * steps; + output[offset.x + stride_dst.w + x * stride_dst.x + y * stride_dst.y + pos.y * stride_dst.z] = input[offset.y + stride_src.w + x * stride_src.x + y * stride_src.y + pos.y * stride_src.z]; + } +} \ No newline at end of file diff --git a/source/backend/opencl/execution/cl/loop_buf.cl b/source/backend/opencl/execution/cl/loop_buf.cl new file mode 100644 index 00000000..a3217996 --- /dev/null +++ b/source/backend/opencl/execution/cl/loop_buf.cl @@ -0,0 +1,164 @@ +#ifdef MNN_SUPPORT_FP16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +__kernel void batch_matmul_buf(__private int global_dim0, __private int global_dim1, __private int global_dim2, + __global FLOAT* output, __global FLOAT* input_A, __global FLOAT* input_B, +#ifdef BIAS + __global FLOAT* input_C, +#endif + __global FLOAT* offset_O, __global FLOAT* offset_A, __global FLOAT* offset_B, +#ifdef BIAS + __global FLOAT* offset_C, +#endif + __private const int e, + __private const int l, + __private const int h, + __private const int4 offsets, + __private const int4 iters, + __private const int4 steps) { + int3 pos = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + + if (pos.x < global_dim0 && pos.y < global_dim1 && pos.z < global_dim2) { + int4 index = (int4)(pos.z); + if (iters.x >= 0) { + index.x = (int)(offset_O[pos.z]); + } + if (iters.y >= 0) { + index.y = (int)(offset_A[pos.z]); + } + if (iters.z >= 0) { + index.z = (int)(offset_B[pos.z]); + } +#ifdef BIAS + if (iters.w >= 0) { + index.w = (int)(offset_C[pos.z]); + } +#endif + int4 offset = index * steps + offsets; + +#if TRANSPOSE_A + __global FLOAT* A_ptr = input_A + offset.y + pos.y; +#else + __global FLOAT* A_ptr = input_A + offset.y + pos.y * l; +#endif + +#if TRANSPOSE_B + __global FLOAT* B_ptr = input_B + offset.z + pos.x * l; +#else + __global FLOAT* B_ptr = input_B + offset.z + pos.x; +#endif + +#ifdef BIAS + FLOAT value = input_C[offset.w + pos.x]; +#else + FLOAT value = 0; +#endif + + for(int i = 0; i < l; ++i){ +#if TRANSPOSE_A + FLOAT value_a = A_ptr[i * e]; +#else + FLOAT value_a = A_ptr[i]; +#endif + +#if TRANSPOSE_B + FLOAT value_b = B_ptr[i]; +#else + FLOAT value_b = B_ptr[i * h]; +#endif + + value = mad(value_a, value_b, value); + } + + output[offset.x + pos.y * h + pos.x] = value; + } +} + +__kernel void tile_buf(__private int global_dim0, __private int global_dim1, __private int global_dim2, + __global FLOAT* input, __global FLOAT* output, + __private const int width, + __private const int height, + __private const int channel){ + int3 pos = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + if (pos.x < global_dim0 && pos.y < global_dim1 && pos.z < global_dim2) { + const int w = pos.x % width; + const int h = pos.x / width; + const int c = pos.y << 2; + const int x_src_pitch = 4; + const int y_src_pitch = x_src_pitch * width; + const int c_src_pitch = y_src_pitch * height; + const int b_src_pitch = c_src_pitch * ((channel + 3) / 4); + + const int x_dst_pitch = 1; + const int y_dst_pitch = x_dst_pitch * width; + const int c_dst_pitch = y_dst_pitch * height; + const int b_dst_pitch = c_dst_pitch * channel; + __global FLOAT* dst_ptr = output + pos.z * b_dst_pitch + c * c_dst_pitch + h * y_dst_pitch + w * x_dst_pitch; + + FLOAT4 value = vload4(0, input + pos.z * b_src_pitch + pos.y * c_src_pitch + h * y_src_pitch + w * x_src_pitch); + dst_ptr[0] = value.x; + if(c + 1 >= channel)return; + dst_ptr[c_dst_pitch] = value.y; + if(c + 2 >= channel)return; + dst_ptr[2 * c_dst_pitch] = value.z; + if(c + 3 >= channel)return; + dst_ptr[3 * c_dst_pitch] = value.w; + } +} + +__kernel void pack_buf(__private int global_dim0, __private int global_dim1, __private int global_dim2, + __global FLOAT* input, __global FLOAT* output, + __private const int width, + __private const int height, + __private const int channel){ + int3 pos = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + if (pos.x < global_dim0 && pos.y < global_dim1 && pos.z < global_dim2) { + const int w = pos.x % width; + const int h = pos.x / width; + const int c = pos.y << 2; + const int x_dst_pitch = 4; + const int y_dst_pitch = x_dst_pitch * width; + const int c_dst_pitch = y_dst_pitch * height; + const int b_dst_pitch = c_dst_pitch * ((channel + 3) / 4); + + const int x_src_pitch = 1; + const int y_src_pitch = x_src_pitch * width; + const int c_src_pitch = y_src_pitch * height; + const int b_src_pitch = c_src_pitch * channel; + __global FLOAT* src_ptr = input + pos.z * b_src_pitch + c * c_src_pitch + h * y_src_pitch + w * x_src_pitch; + FLOAT4 value = (FLOAT4)0; + FLOAT *value_ptr = (FLOAT*)&value; + for(int i = 0; i < 4 && (i + c < channel); ++i){ + value_ptr[i] = src_ptr[i * c_src_pitch]; + } + vstore4(value, 0, output + pos.z * b_dst_pitch + pos.y * c_dst_pitch + h * y_dst_pitch + w * x_dst_pitch); + } +} + +__kernel void batch_gather_buf(__private int global_dim0, __private int global_dim1, __private int global_dim2, + __global FLOAT* output, __global FLOAT* input, + __global FLOAT* offset_dst, __global FLOAT* offset_src, + __private const int x_size, + __private const int4 stride_src, + __private const int4 stride_dst, + __private const int2 steps, + __private const int2 iters) { + int3 pos = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + + if (pos.x < global_dim0 && pos.y < global_dim1 && pos.z < global_dim2) { + + int x = pos.x % x_size; + int y = pos.x / x_size; + + int2 index = (int2)(pos.z, pos.z); + if (iters.x >= 0) { + index.x = (int)(offset_dst[pos.z]); + } + if (iters.y >= 0) { + index.y = (int)(offset_src[pos.z]); + } + int2 offset = index * steps; + output[offset.x + stride_dst.w + x * stride_dst.x + y * stride_dst.y + pos.y * stride_dst.z] = input[offset.y + stride_src.w + x * stride_src.x + y * stride_src.y + pos.y * stride_src.z]; + } +} diff --git a/source/backend/opencl/execution/cl/opencl_program.cc b/source/backend/opencl/execution/cl/opencl_program.cc index 7dd00e1f..8d2e3d26 100644 --- a/source/backend/opencl/execution/cl/opencl_program.cc +++ b/source/backend/opencl/execution/cl/opencl_program.cc @@ -100,6 +100,16 @@ extern const std::map> OpenCLProgramMap { 0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x4d,0x4e,0x4e,0x5f,0x53,0x55,0x50,0x50,0x4f,0x52,0x54,0x5f,0x46,0x50,0x31,0x36,0xa,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x6b,0x68,0x72,0x5f,0x66,0x70,0x31,0x36,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x33,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5c,0xa,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x44,0x45,0x41,0x4c,0x5f,0x4e,0x4f,0x4e,0x5f,0x55,0x4e,0x49,0x46,0x4f,0x52,0x4d,0x5f,0x44,0x49,0x4d,0x33,0x28,0x69,0x6e,0x70,0x75,0x74,0x31,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x32,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x33,0x29,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5c,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x6e,0x70,0x75,0x74,0x31,0x20,0x3e,0x3d,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x20,0x7c,0x7c,0x20,0x69,0x6e,0x70,0x75,0x74,0x32,0x20,0x3e,0x3d,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x20,0x7c,0x7c,0x20,0x69,0x6e,0x70,0x75,0x74,0x33,0x20,0x3e,0x3d,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x32,0x29,0x20,0x7b,0x20,0x5c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5c,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x6e,0x65,0x61,0x72,0x65,0x73,0x74,0x5f,0x62,0x75,0x66,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x33,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x73,0x63,0x61,0x6c,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x73,0x63,0x61,0x6c,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x42,0x6c,0x6f,0x63,0x6b,0x73,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x44,0x45,0x41,0x4c,0x5f,0x4e,0x4f,0x4e,0x5f,0x55,0x4e,0x49,0x46,0x4f,0x52,0x4d,0x5f,0x44,0x49,0x4d,0x33,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x2f,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x25,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x73,0x63,0x61,0x6c,0x65,0x20,0x2b,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x73,0x63,0x61,0x6c,0x65,0x20,0x2b,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x6e,0x64,0x65,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6d,0x61,0x78,0x28,0x30,0x2c,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x64,0x78,0x29,0x29,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2d,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x6e,0x64,0x65,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6d,0x61,0x78,0x28,0x30,0x2c,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x64,0x78,0x29,0x29,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2d,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x28,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x42,0x6c,0x6f,0x63,0x6b,0x73,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x29,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x6e,0x64,0x65,0x78,0x29,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x76,0x6c,0x6f,0x61,0x64,0x34,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x28,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x42,0x6c,0x6f,0x63,0x6b,0x73,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x29,0x20,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x29,0x20,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x76,0x73,0x74,0x6f,0x72,0x65,0x34,0x28,0x76,0x61,0x6c,0x75,0x65,0x2c,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x62,0x69,0x6c,0x69,0x6e,0x65,0x61,0x72,0x5f,0x62,0x75,0x66,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x33,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x73,0x63,0x61,0x6c,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x73,0x63,0x61,0x6c,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x42,0x6c,0x6f,0x63,0x6b,0x73,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x44,0x45,0x41,0x4c,0x5f,0x4e,0x4f,0x4e,0x5f,0x55,0x4e,0x49,0x46,0x4f,0x52,0x4d,0x5f,0x44,0x49,0x4d,0x33,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x2f,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x25,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x73,0x63,0x61,0x6c,0x65,0x20,0x2b,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x73,0x63,0x61,0x6c,0x65,0x20,0x2b,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x5f,0x68,0x30,0x5f,0x69,0x6e,0x64,0x65,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6d,0x61,0x78,0x28,0x30,0x2c,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x64,0x78,0x29,0x29,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2d,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x5f,0x77,0x30,0x5f,0x69,0x6e,0x64,0x65,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6d,0x61,0x78,0x28,0x30,0x2c,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x64,0x78,0x29,0x29,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2d,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x5f,0x68,0x31,0x5f,0x69,0x6e,0x64,0x65,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6d,0x61,0x78,0x28,0x30,0x2c,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x64,0x78,0x29,0x2b,0x31,0x29,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2d,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x5f,0x77,0x31,0x5f,0x69,0x6e,0x64,0x65,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6d,0x61,0x78,0x28,0x30,0x2c,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x64,0x78,0x29,0x2b,0x31,0x29,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2d,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x77,0x20,0x3d,0x20,0x28,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x64,0x78,0x20,0x2d,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x64,0x78,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x68,0x20,0x3d,0x20,0x28,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x64,0x78,0x20,0x2d,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x64,0x78,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x62,0x61,0x73,0x65,0x20,0x3d,0x20,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x42,0x6c,0x6f,0x63,0x6b,0x73,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x29,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x30,0x30,0x20,0x3d,0x20,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x62,0x61,0x73,0x65,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x68,0x30,0x5f,0x69,0x6e,0x64,0x65,0x78,0x29,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x77,0x30,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x30,0x31,0x20,0x3d,0x20,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x62,0x61,0x73,0x65,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x68,0x30,0x5f,0x69,0x6e,0x64,0x65,0x78,0x29,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x77,0x31,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x31,0x30,0x20,0x3d,0x20,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x62,0x61,0x73,0x65,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x68,0x31,0x5f,0x69,0x6e,0x64,0x65,0x78,0x29,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x77,0x30,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x31,0x31,0x20,0x3d,0x20,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x62,0x61,0x73,0x65,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x68,0x31,0x5f,0x69,0x6e,0x64,0x65,0x78,0x29,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x77,0x31,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x30,0x30,0x20,0x3d,0x20,0x76,0x6c,0x6f,0x61,0x64,0x34,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x30,0x30,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x30,0x31,0x20,0x3d,0x20,0x76,0x6c,0x6f,0x61,0x64,0x34,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x30,0x31,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x31,0x30,0x20,0x3d,0x20,0x76,0x6c,0x6f,0x61,0x64,0x34,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x31,0x30,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x31,0x31,0x20,0x3d,0x20,0x76,0x6c,0x6f,0x61,0x64,0x34,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x31,0x31,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x43,0x4f,0x4e,0x56,0x45,0x52,0x54,0x5f,0x46,0x4c,0x4f,0x41,0x54,0x34,0x28,0x28,0x66,0x6c,0x6f,0x61,0x74,0x34,0x29,0x28,0x28,0x31,0x2e,0x30,0x2d,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x77,0x29,0x2a,0x28,0x31,0x2e,0x30,0x2d,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x68,0x29,0x29,0x2a,0x63,0x6f,0x6e,0x76,0x65,0x72,0x74,0x5f,0x66,0x6c,0x6f,0x61,0x74,0x34,0x28,0x76,0x61,0x6c,0x75,0x65,0x5f,0x30,0x30,0x29,0x20,0x2b,0x20,0x28,0x66,0x6c,0x6f,0x61,0x74,0x34,0x29,0x28,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x77,0x2a,0x28,0x31,0x2e,0x30,0x2d,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x68,0x29,0x29,0x2a,0x63,0x6f,0x6e,0x76,0x65,0x72,0x74,0x5f,0x66,0x6c,0x6f,0x61,0x74,0x34,0x28,0x76,0x61,0x6c,0x75,0x65,0x5f,0x30,0x31,0x29,0x20,0x2b,0x20,0x28,0x66,0x6c,0x6f,0x61,0x74,0x34,0x29,0x28,0x28,0x31,0x2e,0x30,0x2d,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x77,0x29,0x2a,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x68,0x29,0x2a,0x63,0x6f,0x6e,0x76,0x65,0x72,0x74,0x5f,0x66,0x6c,0x6f,0x61,0x74,0x34,0x28,0x76,0x61,0x6c,0x75,0x65,0x5f,0x31,0x30,0x29,0x20,0x2b,0x20,0x28,0x66,0x6c,0x6f,0x61,0x74,0x34,0x29,0x28,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x77,0x2a,0x66,0x61,0x63,0x74,0x6f,0x72,0x5f,0x68,0x29,0x2a,0x63,0x6f,0x6e,0x76,0x65,0x72,0x74,0x5f,0x66,0x6c,0x6f,0x61,0x74,0x34,0x28,0x76,0x61,0x6c,0x75,0x65,0x5f,0x31,0x31,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x28,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x42,0x6c,0x6f,0x63,0x6b,0x73,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x29,0x20,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x29,0x20,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x76,0x73,0x74,0x6f,0x72,0x65,0x34,0x28,0x76,0x61,0x6c,0x75,0x65,0x2c,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x6e,0x65,0x61,0x72,0x65,0x73,0x74,0x33,0x44,0x5f,0x62,0x75,0x66,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x33,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x64,0x65,0x70,0x74,0x68,0x5f,0x73,0x63,0x61,0x6c,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x73,0x63,0x61,0x6c,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x73,0x63,0x61,0x6c,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x64,0x65,0x70,0x74,0x68,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x42,0x6c,0x6f,0x63,0x6b,0x73,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x64,0x65,0x70,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x44,0x45,0x41,0x4c,0x5f,0x4e,0x4f,0x4e,0x5f,0x55,0x4e,0x49,0x46,0x4f,0x52,0x4d,0x5f,0x44,0x49,0x4d,0x33,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x64,0x65,0x70,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x29,0x3b,0xa,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x64,0x65,0x70,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x2f,0x20,0x6f,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x64,0x65,0x70,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x25,0x20,0x6f,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x2f,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x20,0x25,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x69,0x6e,0x5f,0x64,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x64,0x65,0x70,0x74,0x68,0x5f,0x73,0x63,0x61,0x6c,0x65,0x20,0x2b,0x20,0x64,0x65,0x70,0x74,0x68,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x73,0x63,0x61,0x6c,0x65,0x20,0x2b,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x66,0x6c,0x6f,0x61,0x74,0x20,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x73,0x63,0x61,0x6c,0x65,0x20,0x2b,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x5f,0x64,0x5f,0x69,0x6e,0x64,0x65,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6d,0x61,0x78,0x28,0x30,0x2c,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x64,0x5f,0x69,0x64,0x78,0x29,0x29,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x2d,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x6e,0x64,0x65,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6d,0x61,0x78,0x28,0x30,0x2c,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x64,0x78,0x29,0x29,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x2d,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x6e,0x64,0x65,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6d,0x61,0x78,0x28,0x30,0x2c,0x20,0x28,0x69,0x6e,0x74,0x29,0x66,0x6c,0x6f,0x6f,0x72,0x28,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x64,0x78,0x29,0x29,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x2d,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x28,0x28,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x42,0x6c,0x6f,0x63,0x6b,0x73,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x64,0x5f,0x69,0x6e,0x64,0x65,0x78,0x29,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x68,0x5f,0x69,0x6e,0x64,0x65,0x78,0x29,0x20,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x69,0x6e,0x5f,0x77,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x28,0x28,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x42,0x6c,0x6f,0x63,0x6b,0x73,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x5f,0x69,0x64,0x78,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x64,0x65,0x70,0x74,0x68,0x5f,0x69,0x64,0x78,0x29,0x20,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x29,0x20,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x76,0x6c,0x6f,0x61,0x64,0x34,0x28,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x76,0x73,0x74,0x6f,0x72,0x65,0x34,0x28,0x76,0x61,0x6c,0x75,0x65,0x2c,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x29,0x3b,0xa,0x7d, } }, #endif +{ + "loop", + { 0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x4d,0x4e,0x4e,0x5f,0x53,0x55,0x50,0x50,0x4f,0x52,0x54,0x5f,0x46,0x50,0x31,0x36,0xa,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x6b,0x68,0x72,0x5f,0x66,0x70,0x31,0x36,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x73,0x61,0x6d,0x70,0x6c,0x65,0x72,0x5f,0x74,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x20,0x3d,0x20,0x43,0x4c,0x4b,0x5f,0x4e,0x4f,0x52,0x4d,0x41,0x4c,0x49,0x5a,0x45,0x44,0x5f,0x43,0x4f,0x4f,0x52,0x44,0x53,0x5f,0x46,0x41,0x4c,0x53,0x45,0x20,0x7c,0x20,0x43,0x4c,0x4b,0x5f,0x41,0x44,0x44,0x52,0x45,0x53,0x53,0x5f,0x43,0x4c,0x41,0x4d,0x50,0x20,0x7c,0x20,0x43,0x4c,0x4b,0x5f,0x46,0x49,0x4c,0x54,0x45,0x52,0x5f,0x4e,0x45,0x41,0x52,0x45,0x53,0x54,0x3b,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x6d,0x61,0x74,0x6d,0x75,0x6c,0x28,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x41,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x42,0x2c,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x43,0x2c,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x4f,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x41,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x42,0x2c,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x43,0x2c,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6c,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x69,0x74,0x65,0x72,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x73,0x74,0x65,0x70,0x73,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x33,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x33,0x29,0x28,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x70,0x6f,0x73,0x2e,0x78,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x34,0x20,0x69,0x6e,0x64,0x65,0x78,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x34,0x29,0x28,0x70,0x6f,0x73,0x2e,0x7a,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x78,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x78,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x4f,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x79,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x79,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x41,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x7a,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x7a,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x42,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x77,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x77,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x43,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x34,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x69,0x6e,0x64,0x65,0x78,0x20,0x2a,0x20,0x73,0x74,0x65,0x70,0x73,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x73,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x23,0x69,0x66,0x20,0x54,0x52,0x41,0x4e,0x53,0x50,0x4f,0x53,0x45,0x5f,0x41,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x41,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x41,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x79,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x41,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x41,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x79,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x6c,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x69,0x66,0x20,0x54,0x52,0x41,0x4e,0x53,0x50,0x4f,0x53,0x45,0x5f,0x42,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x42,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x42,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x7a,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x2a,0x20,0x6c,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x42,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x42,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x7a,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x78,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x43,0x5b,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x77,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x78,0x5d,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x30,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x30,0x3b,0x20,0x69,0x20,0x3c,0x20,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x23,0x69,0x66,0x20,0x54,0x52,0x41,0x4e,0x53,0x50,0x4f,0x53,0x45,0x5f,0x41,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x61,0x20,0x3d,0x20,0x41,0x5f,0x70,0x74,0x72,0x5b,0x69,0x20,0x2a,0x20,0x65,0x5d,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x61,0x20,0x3d,0x20,0x41,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x69,0x66,0x20,0x54,0x52,0x41,0x4e,0x53,0x50,0x4f,0x53,0x45,0x5f,0x42,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x62,0x20,0x3d,0x20,0x42,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x62,0x20,0x3d,0x20,0x42,0x5f,0x70,0x74,0x72,0x5b,0x69,0x20,0x2a,0x20,0x68,0x5d,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x6d,0x61,0x64,0x28,0x76,0x61,0x6c,0x75,0x65,0x5f,0x61,0x2c,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x62,0x2c,0x20,0x76,0x61,0x6c,0x75,0x65,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x78,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x68,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x78,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x74,0x69,0x6c,0x65,0x28,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x33,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x33,0x29,0x28,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x70,0x6f,0x73,0x2e,0x78,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x25,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x2f,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x3c,0x20,0x32,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x78,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x31,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x79,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x78,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x79,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x2a,0x20,0x62,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x63,0x20,0x2a,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x68,0x20,0x2a,0x20,0x79,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x77,0x20,0x2a,0x20,0x78,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x77,0x2c,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2b,0x20,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x5b,0x30,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x63,0x20,0x2b,0x20,0x31,0x20,0x3e,0x3d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x5b,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x2e,0x79,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x63,0x20,0x2b,0x20,0x32,0x20,0x3e,0x3d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x5b,0x32,0x20,0x2a,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x2e,0x7a,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x63,0x20,0x2b,0x20,0x33,0x20,0x3e,0x3d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x5b,0x33,0x20,0x2a,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x2e,0x77,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x70,0x61,0x63,0x6b,0x28,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x33,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x33,0x29,0x28,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x70,0x6f,0x73,0x2e,0x78,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x25,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x2f,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x3c,0x20,0x32,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x78,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x31,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x79,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x78,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x79,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x73,0x72,0x63,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x2a,0x20,0x62,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x63,0x20,0x2a,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x68,0x20,0x2a,0x20,0x79,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x77,0x20,0x2a,0x20,0x78,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x2a,0x76,0x61,0x6c,0x75,0x65,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x76,0x61,0x6c,0x75,0x65,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x30,0x3b,0x20,0x69,0x20,0x3c,0x20,0x34,0x20,0x26,0x26,0x20,0x28,0x69,0x20,0x2b,0x20,0x63,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x20,0x3d,0x20,0x73,0x72,0x63,0x5f,0x70,0x74,0x72,0x5b,0x69,0x20,0x2a,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x77,0x2c,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2b,0x20,0x68,0x29,0x2c,0x20,0x76,0x61,0x6c,0x75,0x65,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x67,0x61,0x74,0x68,0x65,0x72,0x28,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x64,0x73,0x74,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x73,0x72,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x78,0x5f,0x73,0x69,0x7a,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x32,0x20,0x73,0x74,0x65,0x70,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x32,0x20,0x69,0x74,0x65,0x72,0x73,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x33,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x33,0x29,0x28,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x70,0x6f,0x73,0x2e,0x78,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x20,0x78,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x25,0x20,0x78,0x5f,0x73,0x69,0x7a,0x65,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x20,0x79,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x2f,0x20,0x78,0x5f,0x73,0x69,0x7a,0x65,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x32,0x20,0x69,0x6e,0x64,0x65,0x78,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2e,0x7a,0x2c,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x78,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x78,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x64,0x73,0x74,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x79,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x79,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x73,0x72,0x63,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x32,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x69,0x6e,0x64,0x65,0x78,0x20,0x2a,0x20,0x73,0x74,0x65,0x70,0x73,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x78,0x20,0x2b,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2e,0x77,0x20,0x2b,0x20,0x78,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2e,0x78,0x20,0x2b,0x20,0x79,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2e,0x79,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2e,0x7a,0x5d,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5b,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x79,0x20,0x2b,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2e,0x77,0x20,0x2b,0x20,0x78,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2e,0x78,0x20,0x2b,0x20,0x79,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2e,0x79,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2e,0x7a,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d, } + }, +#ifndef MNN_OPENCL_BUFFER_CLOSED +{ + "loop_buf", + { 0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x4d,0x4e,0x4e,0x5f,0x53,0x55,0x50,0x50,0x4f,0x52,0x54,0x5f,0x46,0x50,0x31,0x36,0xa,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x6b,0x68,0x72,0x5f,0x66,0x70,0x31,0x36,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x6d,0x61,0x74,0x6d,0x75,0x6c,0x5f,0x62,0x75,0x66,0x28,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x41,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x42,0x2c,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x43,0x2c,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x4f,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x41,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x42,0x2c,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x43,0x2c,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6c,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x69,0x74,0x65,0x72,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x73,0x74,0x65,0x70,0x73,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x33,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x33,0x29,0x28,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x70,0x6f,0x73,0x2e,0x78,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x34,0x20,0x69,0x6e,0x64,0x65,0x78,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x34,0x29,0x28,0x70,0x6f,0x73,0x2e,0x7a,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x78,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x78,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x4f,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x79,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x79,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x41,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x7a,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x7a,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x42,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x77,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x77,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x43,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x34,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x69,0x6e,0x64,0x65,0x78,0x20,0x2a,0x20,0x73,0x74,0x65,0x70,0x73,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x73,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x23,0x69,0x66,0x20,0x54,0x52,0x41,0x4e,0x53,0x50,0x4f,0x53,0x45,0x5f,0x41,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x41,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x41,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x79,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x41,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x41,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x79,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x6c,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x69,0x66,0x20,0x54,0x52,0x41,0x4e,0x53,0x50,0x4f,0x53,0x45,0x5f,0x42,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x42,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x42,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x7a,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x2a,0x20,0x6c,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x42,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x42,0x20,0x2b,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x7a,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x78,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x43,0x5b,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x77,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x78,0x5d,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x30,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x30,0x3b,0x20,0x69,0x20,0x3c,0x20,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x23,0x69,0x66,0x20,0x54,0x52,0x41,0x4e,0x53,0x50,0x4f,0x53,0x45,0x5f,0x41,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x61,0x20,0x3d,0x20,0x41,0x5f,0x70,0x74,0x72,0x5b,0x69,0x20,0x2a,0x20,0x65,0x5d,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x61,0x20,0x3d,0x20,0x41,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x69,0x66,0x20,0x54,0x52,0x41,0x4e,0x53,0x50,0x4f,0x53,0x45,0x5f,0x42,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x62,0x20,0x3d,0x20,0x42,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x62,0x20,0x3d,0x20,0x42,0x5f,0x70,0x74,0x72,0x5b,0x69,0x20,0x2a,0x20,0x68,0x5d,0x3b,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x6d,0x61,0x64,0x28,0x76,0x61,0x6c,0x75,0x65,0x5f,0x61,0x2c,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x62,0x2c,0x20,0x76,0x61,0x6c,0x75,0x65,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x78,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x68,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x78,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x74,0x69,0x6c,0x65,0x5f,0x62,0x75,0x66,0x28,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x33,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x33,0x29,0x28,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x70,0x6f,0x73,0x2e,0x78,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x25,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x2f,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x3c,0x20,0x32,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x78,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x34,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x79,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x78,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x79,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x28,0x28,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x20,0x2b,0x20,0x33,0x29,0x20,0x2f,0x20,0x34,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x78,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x31,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x79,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x78,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x79,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x2a,0x20,0x62,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x63,0x20,0x2a,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x68,0x20,0x2a,0x20,0x79,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x77,0x20,0x2a,0x20,0x78,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x76,0x6c,0x6f,0x61,0x64,0x34,0x28,0x30,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x2a,0x20,0x62,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x68,0x20,0x2a,0x20,0x79,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x77,0x20,0x2a,0x20,0x78,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x5b,0x30,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x63,0x20,0x2b,0x20,0x31,0x20,0x3e,0x3d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x5b,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x2e,0x79,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x63,0x20,0x2b,0x20,0x32,0x20,0x3e,0x3d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x5b,0x32,0x20,0x2a,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x2e,0x7a,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x63,0x20,0x2b,0x20,0x33,0x20,0x3e,0x3d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x64,0x73,0x74,0x5f,0x70,0x74,0x72,0x5b,0x33,0x20,0x2a,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x5d,0x20,0x3d,0x20,0x76,0x61,0x6c,0x75,0x65,0x2e,0x77,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x70,0x61,0x63,0x6b,0x5f,0x62,0x75,0x66,0x28,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x33,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x33,0x29,0x28,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x70,0x6f,0x73,0x2e,0x78,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x25,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x2f,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x3c,0x20,0x32,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x78,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x34,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x79,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x78,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x79,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x28,0x28,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x20,0x2b,0x20,0x33,0x29,0x20,0x2f,0x20,0x34,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x78,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x31,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x79,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x78,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x79,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x3d,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x73,0x72,0x63,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x2a,0x20,0x62,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x63,0x20,0x2a,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x68,0x20,0x2a,0x20,0x79,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x77,0x20,0x2a,0x20,0x78,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x61,0x6c,0x75,0x65,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x2a,0x76,0x61,0x6c,0x75,0x65,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x76,0x61,0x6c,0x75,0x65,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x30,0x3b,0x20,0x69,0x20,0x3c,0x20,0x34,0x20,0x26,0x26,0x20,0x28,0x69,0x20,0x2b,0x20,0x63,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x76,0x61,0x6c,0x75,0x65,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x20,0x3d,0x20,0x73,0x72,0x63,0x5f,0x70,0x74,0x72,0x5b,0x69,0x20,0x2a,0x20,0x63,0x5f,0x73,0x72,0x63,0x5f,0x70,0x69,0x74,0x63,0x68,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x76,0x73,0x74,0x6f,0x72,0x65,0x34,0x28,0x76,0x61,0x6c,0x75,0x65,0x2c,0x20,0x30,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x2a,0x20,0x62,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x63,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x68,0x20,0x2a,0x20,0x79,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x20,0x2b,0x20,0x77,0x20,0x2a,0x20,0x78,0x5f,0x64,0x73,0x74,0x5f,0x70,0x69,0x74,0x63,0x68,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x67,0x61,0x74,0x68,0x65,0x72,0x5f,0x62,0x75,0x66,0x28,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x64,0x73,0x74,0x2c,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x73,0x72,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x78,0x5f,0x73,0x69,0x7a,0x65,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x34,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x32,0x20,0x73,0x74,0x65,0x70,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x32,0x20,0x69,0x74,0x65,0x72,0x73,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x33,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x33,0x29,0x28,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x2c,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x70,0x6f,0x73,0x2e,0x78,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x30,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x31,0x20,0x26,0x26,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x20,0x3c,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x64,0x69,0x6d,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x20,0x78,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x25,0x20,0x78,0x5f,0x73,0x69,0x7a,0x65,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x20,0x79,0x20,0x3d,0x20,0x70,0x6f,0x73,0x2e,0x78,0x20,0x2f,0x20,0x78,0x5f,0x73,0x69,0x7a,0x65,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x32,0x20,0x69,0x6e,0x64,0x65,0x78,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2e,0x7a,0x2c,0x20,0x70,0x6f,0x73,0x2e,0x7a,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x78,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x78,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x64,0x73,0x74,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x74,0x65,0x72,0x73,0x2e,0x79,0x20,0x3e,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x64,0x65,0x78,0x2e,0x79,0x20,0x3d,0x20,0x28,0x69,0x6e,0x74,0x29,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x73,0x72,0x63,0x5b,0x70,0x6f,0x73,0x2e,0x7a,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x6e,0x74,0x32,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x69,0x6e,0x64,0x65,0x78,0x20,0x2a,0x20,0x73,0x74,0x65,0x70,0x73,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x78,0x20,0x2b,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2e,0x77,0x20,0x2b,0x20,0x78,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2e,0x78,0x20,0x2b,0x20,0x79,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2e,0x79,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x64,0x73,0x74,0x2e,0x7a,0x5d,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5b,0x6f,0x66,0x66,0x73,0x65,0x74,0x2e,0x79,0x20,0x2b,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2e,0x77,0x20,0x2b,0x20,0x78,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2e,0x78,0x20,0x2b,0x20,0x79,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2e,0x79,0x20,0x2b,0x20,0x70,0x6f,0x73,0x2e,0x79,0x20,0x2a,0x20,0x73,0x74,0x72,0x69,0x64,0x65,0x5f,0x73,0x72,0x63,0x2e,0x7a,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa, } + }, +#endif { "matmul", { 0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x4d,0x4e,0x4e,0x5f,0x53,0x55,0x50,0x50,0x4f,0x52,0x54,0x5f,0x46,0x50,0x31,0x36,0xa,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x6b,0x68,0x72,0x5f,0x66,0x70,0x31,0x36,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5c,0xa,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x2c,0xa,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x44,0x45,0x41,0x4c,0x5f,0x4e,0x4f,0x4e,0x5f,0x55,0x4e,0x49,0x46,0x4f,0x52,0x4d,0x5f,0x44,0x49,0x4d,0x32,0x28,0x69,0x6e,0x70,0x75,0x74,0x31,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x32,0x29,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5c,0xa,0x69,0x66,0x20,0x28,0x69,0x6e,0x70,0x75,0x74,0x31,0x20,0x3e,0x3d,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x20,0x7c,0x7c,0x20,0x69,0x6e,0x70,0x75,0x74,0x32,0x20,0x3e,0x3d,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x29,0x20,0x7b,0x20,0x5c,0xa,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5c,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x73,0x61,0x6d,0x70,0x6c,0x65,0x72,0x5f,0x74,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x20,0x3d,0x20,0x43,0x4c,0x4b,0x5f,0x4e,0x4f,0x52,0x4d,0x41,0x4c,0x49,0x5a,0x45,0x44,0x5f,0x43,0x4f,0x4f,0x52,0x44,0x53,0x5f,0x46,0x41,0x4c,0x53,0x45,0x20,0x7c,0x20,0x43,0x4c,0x4b,0x5f,0x41,0x44,0x44,0x52,0x45,0x53,0x53,0x5f,0x43,0x4c,0x41,0x4d,0x50,0x20,0x7c,0x20,0x43,0x4c,0x4b,0x5f,0x46,0x49,0x4c,0x54,0x45,0x52,0x5f,0x4e,0x45,0x41,0x52,0x45,0x53,0x54,0x3b,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x6d,0x61,0x74,0x6d,0x75,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x44,0x45,0x41,0x4c,0x5f,0x4e,0x4f,0x4e,0x5f,0x55,0x4e,0x49,0x46,0x4f,0x52,0x4d,0x5f,0x44,0x49,0x4d,0x32,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x30,0x20,0x3d,0x20,0x30,0x2c,0x20,0x62,0x31,0x20,0x3d,0x20,0x30,0x2c,0x20,0x62,0x32,0x20,0x3d,0x20,0x30,0x2c,0x20,0x62,0x33,0x20,0x3d,0x20,0x30,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x74,0x65,0x6d,0x70,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x3d,0x20,0x74,0x65,0x6d,0x70,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x3d,0x20,0x74,0x65,0x6d,0x70,0x2e,0x79,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x3d,0x20,0x74,0x65,0x6d,0x70,0x2e,0x7a,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x3d,0x20,0x74,0x65,0x6d,0x70,0x2e,0x77,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x73,0x68,0x6f,0x72,0x74,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x30,0x3b,0x20,0x70,0x6f,0x73,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x3b,0x20,0x70,0x6f,0x73,0x20,0x2b,0x3d,0x20,0x31,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x29,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x68,0x6f,0x72,0x74,0x20,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x20,0x28,0x70,0x6f,0x73,0x20,0x2b,0x20,0x31,0x29,0x20,0x2a,0x20,0x34,0x20,0x2d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x73,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x30,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x70,0x6f,0x73,0x20,0x2a,0x20,0x34,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x31,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x70,0x6f,0x73,0x20,0x2a,0x20,0x34,0x20,0x2b,0x20,0x31,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x32,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x70,0x6f,0x73,0x20,0x2a,0x20,0x34,0x20,0x2b,0x20,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x33,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x70,0x6f,0x73,0x20,0x2a,0x20,0x34,0x20,0x2b,0x20,0x33,0x29,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x3d,0x20,0x33,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x31,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x32,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x33,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0x20,0x65,0x6c,0x73,0x65,0x20,0x69,0x66,0x20,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x3d,0x20,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x32,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x33,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0x20,0x65,0x6c,0x73,0x65,0x20,0x69,0x66,0x20,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x3d,0x20,0x31,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x33,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x74,0x6d,0x70,0x30,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x62,0x30,0x2e,0x73,0x30,0x2c,0x20,0x62,0x31,0x2e,0x73,0x30,0x2c,0x20,0x62,0x32,0x2e,0x73,0x30,0x2c,0x20,0x62,0x33,0x2e,0x73,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x74,0x6d,0x70,0x31,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x62,0x30,0x2e,0x73,0x31,0x2c,0x20,0x62,0x31,0x2e,0x73,0x31,0x2c,0x20,0x62,0x32,0x2e,0x73,0x31,0x2c,0x20,0x62,0x33,0x2e,0x73,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x74,0x6d,0x70,0x32,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x62,0x30,0x2e,0x73,0x32,0x2c,0x20,0x62,0x31,0x2e,0x73,0x32,0x2c,0x20,0x62,0x32,0x2e,0x73,0x32,0x2c,0x20,0x62,0x33,0x2e,0x73,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x74,0x6d,0x70,0x33,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x62,0x30,0x2e,0x73,0x33,0x2c,0x20,0x62,0x31,0x2e,0x73,0x33,0x2c,0x20,0x62,0x32,0x2e,0x73,0x33,0x2c,0x20,0x62,0x33,0x2e,0x73,0x33,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x2c,0x20,0x62,0x74,0x6d,0x70,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x2c,0x20,0x62,0x74,0x6d,0x70,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x2c,0x20,0x62,0x74,0x6d,0x70,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x2c,0x20,0x62,0x74,0x6d,0x70,0x33,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x6d,0x61,0x74,0x6d,0x75,0x6c,0x5f,0x74,0x72,0x61,0x6e,0x73,0x42,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x44,0x45,0x41,0x4c,0x5f,0x4e,0x4f,0x4e,0x5f,0x55,0x4e,0x49,0x46,0x4f,0x52,0x4d,0x5f,0x44,0x49,0x4d,0x32,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x30,0x20,0x3d,0x20,0x30,0x2c,0x20,0x62,0x31,0x20,0x3d,0x20,0x30,0x2c,0x20,0x62,0x32,0x20,0x3d,0x20,0x30,0x2c,0x20,0x62,0x33,0x20,0x3d,0x20,0x30,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x74,0x65,0x6d,0x70,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x3d,0x20,0x74,0x65,0x6d,0x70,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x3d,0x20,0x74,0x65,0x6d,0x70,0x2e,0x79,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x3d,0x20,0x74,0x65,0x6d,0x70,0x2e,0x7a,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x3d,0x20,0x74,0x65,0x6d,0x70,0x2e,0x77,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x73,0x68,0x6f,0x72,0x74,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x30,0x3b,0x20,0x70,0x6f,0x73,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x3b,0x20,0x70,0x6f,0x73,0x20,0x2b,0x3d,0x20,0x31,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x29,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x68,0x6f,0x72,0x74,0x20,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x20,0x28,0x70,0x6f,0x73,0x20,0x2b,0x20,0x31,0x29,0x20,0x2a,0x20,0x34,0x20,0x2d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x73,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x30,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x34,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x31,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x34,0x20,0x2b,0x20,0x31,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x32,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x34,0x20,0x2b,0x20,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x33,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x34,0x20,0x2b,0x20,0x33,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x3d,0x20,0x33,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x2e,0x79,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x2e,0x7a,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x2e,0x77,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0x20,0x65,0x6c,0x73,0x65,0x20,0x69,0x66,0x20,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x3d,0x20,0x32,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x2e,0x7a,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x2e,0x77,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0x20,0x65,0x6c,0x73,0x65,0x20,0x69,0x66,0x20,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x3d,0x20,0x31,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x2e,0x77,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x7d,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x2c,0x20,0x62,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x2c,0x20,0x62,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x2c,0x20,0x62,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x2c,0x20,0x62,0x33,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x20,0x20,0x20,0x20,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x6d,0x61,0x74,0x6d,0x75,0x6c,0x5f,0x74,0x72,0x61,0x6e,0x73,0x41,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x44,0x45,0x41,0x4c,0x5f,0x4e,0x4f,0x4e,0x5f,0x55,0x4e,0x49,0x46,0x4f,0x52,0x4d,0x5f,0x44,0x49,0x4d,0x32,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x5f,0x7a,0x65,0x72,0x6f,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x28,0x46,0x4c,0x4f,0x41,0x54,0x29,0x30,0x2e,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x3d,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x3d,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x3d,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x73,0x68,0x6f,0x72,0x74,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x30,0x3b,0x20,0x70,0x6f,0x73,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x3b,0x20,0x70,0x6f,0x73,0x20,0x2b,0x3d,0x20,0x31,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x30,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x31,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x2b,0x31,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x32,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x2b,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x33,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x2b,0x33,0x29,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x30,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x31,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x2b,0x31,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x32,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x2b,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x33,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x2b,0x33,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x68,0x6f,0x72,0x74,0x20,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x20,0x28,0x70,0x6f,0x73,0x20,0x2b,0x20,0x31,0x29,0x20,0x2a,0x20,0x34,0x20,0x2d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x73,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x33,0x20,0x3d,0x20,0x28,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3e,0x3d,0x20,0x31,0x29,0x20,0x3f,0x20,0x76,0x5f,0x7a,0x65,0x72,0x6f,0x20,0x3a,0x20,0x61,0x33,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x32,0x20,0x3d,0x20,0x28,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3e,0x3d,0x20,0x32,0x29,0x20,0x3f,0x20,0x76,0x5f,0x7a,0x65,0x72,0x6f,0x20,0x3a,0x20,0x61,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x31,0x20,0x3d,0x20,0x28,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3e,0x3d,0x20,0x33,0x29,0x20,0x3f,0x20,0x76,0x5f,0x7a,0x65,0x72,0x6f,0x20,0x3a,0x20,0x61,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x61,0x30,0x2e,0x78,0x2c,0x20,0x61,0x31,0x2e,0x78,0x2c,0x20,0x61,0x32,0x2e,0x78,0x2c,0x20,0x61,0x33,0x2e,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x61,0x30,0x2e,0x79,0x2c,0x20,0x61,0x31,0x2e,0x79,0x2c,0x20,0x61,0x32,0x2e,0x79,0x2c,0x20,0x61,0x33,0x2e,0x79,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x61,0x30,0x2e,0x7a,0x2c,0x20,0x61,0x31,0x2e,0x7a,0x2c,0x20,0x61,0x32,0x2e,0x7a,0x2c,0x20,0x61,0x33,0x2e,0x7a,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x61,0x30,0x2e,0x77,0x2c,0x20,0x61,0x31,0x2e,0x77,0x2c,0x20,0x61,0x32,0x2e,0x77,0x2c,0x20,0x61,0x33,0x2e,0x77,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x62,0x30,0x2e,0x78,0x2c,0x20,0x62,0x31,0x2e,0x78,0x2c,0x20,0x62,0x32,0x2e,0x78,0x2c,0x20,0x62,0x33,0x2e,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x62,0x30,0x2e,0x79,0x2c,0x20,0x62,0x31,0x2e,0x79,0x2c,0x20,0x62,0x32,0x2e,0x79,0x2c,0x20,0x62,0x33,0x2e,0x79,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x62,0x30,0x2e,0x7a,0x2c,0x20,0x62,0x31,0x2e,0x7a,0x2c,0x20,0x62,0x32,0x2e,0x7a,0x2c,0x20,0x62,0x33,0x2e,0x7a,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x62,0x30,0x2e,0x77,0x2c,0x20,0x62,0x31,0x2e,0x77,0x2c,0x20,0x62,0x32,0x2e,0x77,0x2c,0x20,0x62,0x33,0x2e,0x77,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x2f,0x2f,0x6d,0x61,0x74,0x6d,0x75,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2e,0x79,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2e,0x7a,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2e,0x77,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2e,0x79,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2e,0x7a,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2e,0x77,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2e,0x79,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2e,0x7a,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2e,0x77,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x2e,0x79,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x2e,0x7a,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x2e,0x77,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x31,0x20,0x3e,0x3d,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x29,0x20,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x31,0x29,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x32,0x20,0x3e,0x3d,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x29,0x20,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x32,0x29,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x33,0x20,0x3e,0x3d,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x29,0x20,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x33,0x29,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x29,0x3b,0xa,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x6d,0x61,0x74,0x6d,0x75,0x6c,0x5f,0x74,0x72,0x61,0x6e,0x73,0x41,0x5f,0x74,0x72,0x61,0x6e,0x73,0x42,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x44,0x45,0x41,0x4c,0x5f,0x4e,0x4f,0x4e,0x5f,0x55,0x4e,0x49,0x46,0x4f,0x52,0x4d,0x5f,0x44,0x49,0x4d,0x32,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x76,0x5f,0x7a,0x65,0x72,0x6f,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x28,0x46,0x4c,0x4f,0x41,0x54,0x29,0x30,0x2e,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x42,0x49,0x41,0x53,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x3d,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x3d,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x3d,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6c,0x73,0x65,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x73,0x68,0x6f,0x72,0x74,0x20,0x70,0x6f,0x73,0x20,0x3d,0x20,0x30,0x3b,0x20,0x70,0x6f,0x73,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x3b,0x20,0x70,0x6f,0x73,0x20,0x2b,0x3d,0x20,0x31,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x30,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x31,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x2b,0x31,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x32,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x2b,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x33,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x61,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x70,0x6f,0x73,0x2b,0x33,0x29,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x30,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x34,0x2a,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x31,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x34,0x2a,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x31,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x32,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x34,0x2a,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x32,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x62,0x33,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x62,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x70,0x6f,0x73,0x2c,0x20,0x34,0x2a,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x33,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x68,0x6f,0x72,0x74,0x20,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3d,0x20,0x28,0x70,0x6f,0x73,0x20,0x2b,0x20,0x31,0x29,0x20,0x2a,0x20,0x34,0x20,0x2d,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x73,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x33,0x20,0x3d,0x20,0x28,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3e,0x3d,0x20,0x31,0x29,0x20,0x3f,0x20,0x76,0x5f,0x7a,0x65,0x72,0x6f,0x20,0x3a,0x20,0x61,0x33,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x32,0x20,0x3d,0x20,0x28,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3e,0x3d,0x20,0x32,0x29,0x20,0x3f,0x20,0x76,0x5f,0x7a,0x65,0x72,0x6f,0x20,0x3a,0x20,0x61,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x61,0x31,0x20,0x3d,0x20,0x28,0x28,0x72,0x65,0x6d,0x61,0x69,0x6e,0x20,0x3e,0x3d,0x20,0x33,0x29,0x20,0x3f,0x20,0x76,0x5f,0x7a,0x65,0x72,0x6f,0x20,0x3a,0x20,0x61,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x61,0x30,0x2e,0x78,0x2c,0x20,0x61,0x31,0x2e,0x78,0x2c,0x20,0x61,0x32,0x2e,0x78,0x2c,0x20,0x61,0x33,0x2e,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x61,0x30,0x2e,0x79,0x2c,0x20,0x61,0x31,0x2e,0x79,0x2c,0x20,0x61,0x32,0x2e,0x79,0x2c,0x20,0x61,0x33,0x2e,0x79,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x61,0x30,0x2e,0x7a,0x2c,0x20,0x61,0x31,0x2e,0x7a,0x2c,0x20,0x61,0x32,0x2e,0x7a,0x2c,0x20,0x61,0x33,0x2e,0x7a,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x61,0x30,0x2e,0x77,0x2c,0x20,0x61,0x31,0x2e,0x77,0x2c,0x20,0x61,0x32,0x2e,0x77,0x2c,0x20,0x61,0x33,0x2e,0x77,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x2f,0x2f,0x6d,0x61,0x74,0x6d,0x75,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2e,0x79,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2e,0x7a,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x2e,0x77,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x30,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x33,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2e,0x79,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2e,0x7a,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x2e,0x77,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x31,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x33,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2e,0x79,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2e,0x7a,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x2e,0x77,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x32,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x33,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x2e,0x79,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x2e,0x7a,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x2e,0x77,0x20,0x2b,0x3d,0x20,0x64,0x6f,0x74,0x28,0x61,0x33,0x5f,0x74,0x72,0x61,0x6e,0x73,0x2c,0x20,0x62,0x33,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x31,0x20,0x3e,0x3d,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x29,0x20,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x31,0x29,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x32,0x20,0x3e,0x3d,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x29,0x20,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x32,0x29,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x28,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x33,0x20,0x3e,0x3d,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x29,0x20,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x63,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2c,0x20,0x34,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x5f,0x62,0x6c,0x6f,0x63,0x6b,0x73,0x5f,0x69,0x64,0x78,0x2b,0x33,0x29,0x2c,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x33,0x29,0x3b,0xa,0x7d,0xa, } @@ -144,12 +154,12 @@ extern const std::map> OpenCLProgramMap #endif { "reduction", - { 0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x75,0x73,0x65,0x20,0x49,0x4e,0x49,0x54,0x5f,0x53,0x43,0x41,0x4c,0x41,0x52,0x5f,0x56,0x41,0x4c,0x55,0x45,0x2c,0x20,0x4f,0x50,0x45,0x52,0x41,0x54,0x4f,0x52,0x2c,0x20,0x46,0x49,0x4e,0x41,0x4c,0x5f,0x4f,0x50,0x45,0x52,0x41,0x54,0x4f,0x52,0x5f,0x4f,0x4e,0x5f,0x43,0x48,0x41,0x4e,0x4e,0x45,0x4c,0x20,0x6d,0x61,0x63,0x72,0x6f,0x20,0x61,0x62,0x73,0x74,0x72,0x61,0x63,0x74,0x20,0x61,0x6e,0x64,0x20,0x73,0x69,0x6d,0x70,0x6c,0x69,0x66,0x79,0x20,0x63,0x6f,0x64,0x65,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x73,0x75,0x70,0x70,0x6f,0x72,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x20,0x64,0x69,0x6d,0x73,0x20,0x69,0x6e,0x63,0x6c,0x75,0x64,0x65,0x20,0x62,0x61,0x74,0x63,0x68,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x73,0x75,0x70,0x70,0x6f,0x72,0x74,0x20,0x6b,0x65,0x65,0x70,0x5f,0x64,0x69,0x6d,0x3d,0x46,0x61,0x6c,0x73,0x65,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x66,0x69,0x78,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x20,0x72,0x65,0x2d,0x70,0x61,0x63,0x6b,0x20,0x70,0x72,0x6f,0x62,0x6c,0x65,0x6d,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x4d,0x4e,0x4e,0x5f,0x53,0x55,0x50,0x50,0x4f,0x52,0x54,0x5f,0x46,0x50,0x31,0x36,0xa,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x6b,0x68,0x72,0x5f,0x66,0x70,0x31,0x36,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x33,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5c,0xa,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5c,0xa,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x2c,0xa,0xa,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x73,0x61,0x6d,0x70,0x6c,0x65,0x72,0x5f,0x74,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x20,0x3d,0x20,0x43,0x4c,0x4b,0x5f,0x4e,0x4f,0x52,0x4d,0x41,0x4c,0x49,0x5a,0x45,0x44,0x5f,0x43,0x4f,0x4f,0x52,0x44,0x53,0x5f,0x46,0x41,0x4c,0x53,0x45,0x20,0x7c,0x20,0x43,0x4c,0x4b,0x5f,0x41,0x44,0x44,0x52,0x45,0x53,0x53,0x5f,0x43,0x4c,0x41,0x4d,0x50,0x20,0x7c,0x20,0x43,0x4c,0x4b,0x5f,0x46,0x49,0x4c,0x54,0x45,0x52,0x5f,0x4e,0x45,0x41,0x52,0x45,0x53,0x54,0x3b,0xa,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x65,0x61,0x6e,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x20,0x2b,0x20,0x69,0x6e,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2f,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x73,0x75,0x6d,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x20,0x2b,0x20,0x69,0x6e,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x61,0x78,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x2d,0x4d,0x41,0x58,0x46,0x4c,0x4f,0x41,0x54,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x6d,0x61,0x78,0x28,0x73,0x75,0x6d,0x2c,0x20,0x69,0x6e,0x2e,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x69,0x6e,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x4d,0x41,0x58,0x46,0x4c,0x4f,0x41,0x54,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x73,0x75,0x6d,0x2c,0x20,0x69,0x6e,0x2e,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x75,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x31,0x2e,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x20,0x2a,0x20,0x69,0x6e,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x65,0x61,0x6e,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x30,0x2e,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x2b,0x20,0x69,0x6e,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x2b,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2f,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x73,0x75,0x6d,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x30,0x2e,0x30,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x2b,0x20,0x69,0x6e,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x2b,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x61,0x78,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x2d,0x4d,0x41,0x58,0x46,0x4c,0x4f,0x41,0x54,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6d,0x61,0x78,0x28,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x2c,0x20,0x69,0x6e,0x2e,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6d,0x61,0x78,0x28,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x2c,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x69,0x6e,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x4d,0x41,0x58,0x46,0x4c,0x4f,0x41,0x54,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x2c,0x20,0x69,0x6e,0x2e,0x78,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x2c,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x75,0x6c,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x31,0x2e,0x30,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x2a,0x20,0x69,0x6e,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x2a,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa, } + { 0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x75,0x73,0x65,0x20,0x49,0x4e,0x49,0x54,0x5f,0x53,0x43,0x41,0x4c,0x41,0x52,0x5f,0x56,0x41,0x4c,0x55,0x45,0x2c,0x20,0x4f,0x50,0x45,0x52,0x41,0x54,0x4f,0x52,0x2c,0x20,0x46,0x49,0x4e,0x41,0x4c,0x5f,0x4f,0x50,0x45,0x52,0x41,0x54,0x4f,0x52,0x5f,0x4f,0x4e,0x5f,0x43,0x48,0x41,0x4e,0x4e,0x45,0x4c,0x20,0x6d,0x61,0x63,0x72,0x6f,0x20,0x61,0x62,0x73,0x74,0x72,0x61,0x63,0x74,0x20,0x61,0x6e,0x64,0x20,0x73,0x69,0x6d,0x70,0x6c,0x69,0x66,0x79,0x20,0x63,0x6f,0x64,0x65,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x73,0x75,0x70,0x70,0x6f,0x72,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x20,0x64,0x69,0x6d,0x73,0x20,0x69,0x6e,0x63,0x6c,0x75,0x64,0x65,0x20,0x62,0x61,0x74,0x63,0x68,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x73,0x75,0x70,0x70,0x6f,0x72,0x74,0x20,0x6b,0x65,0x65,0x70,0x5f,0x64,0x69,0x6d,0x3d,0x46,0x61,0x6c,0x73,0x65,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x66,0x69,0x78,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x20,0x72,0x65,0x2d,0x70,0x61,0x63,0x6b,0x20,0x70,0x72,0x6f,0x62,0x6c,0x65,0x6d,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x4d,0x4e,0x4e,0x5f,0x53,0x55,0x50,0x50,0x4f,0x52,0x54,0x5f,0x46,0x50,0x31,0x36,0xa,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x6b,0x68,0x72,0x5f,0x66,0x70,0x31,0x36,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x33,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5c,0xa,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x32,0x2c,0xa,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5c,0xa,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x2c,0xa,0xa,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x73,0x61,0x6d,0x70,0x6c,0x65,0x72,0x5f,0x74,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x20,0x3d,0x20,0x43,0x4c,0x4b,0x5f,0x4e,0x4f,0x52,0x4d,0x41,0x4c,0x49,0x5a,0x45,0x44,0x5f,0x43,0x4f,0x4f,0x52,0x44,0x53,0x5f,0x46,0x41,0x4c,0x53,0x45,0x20,0x7c,0x20,0x43,0x4c,0x4b,0x5f,0x41,0x44,0x44,0x52,0x45,0x53,0x53,0x5f,0x43,0x4c,0x41,0x4d,0x50,0x20,0x7c,0x20,0x43,0x4c,0x4b,0x5f,0x46,0x49,0x4c,0x54,0x45,0x52,0x5f,0x4e,0x45,0x41,0x52,0x45,0x53,0x54,0x3b,0xa,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x65,0x61,0x6e,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x20,0x2b,0x20,0x69,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x73,0x75,0x6d,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2e,0x78,0x2f,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x2a,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x73,0x75,0x6d,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x20,0x2b,0x20,0x69,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x73,0x75,0x6d,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2e,0x78,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x61,0x78,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x2d,0x4d,0x41,0x58,0x46,0x4c,0x4f,0x41,0x54,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x6d,0x61,0x78,0x28,0x73,0x75,0x6d,0x2c,0x20,0x69,0x6e,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x73,0x75,0x6d,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x2e,0x78,0x20,0x3d,0x20,0x6d,0x61,0x78,0x28,0x73,0x75,0x6d,0x2e,0x78,0x2c,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2e,0x78,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x69,0x6e,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x4d,0x41,0x58,0x46,0x4c,0x4f,0x41,0x54,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x73,0x75,0x6d,0x2c,0x20,0x69,0x6e,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x73,0x75,0x6d,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x2e,0x78,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x73,0x75,0x6d,0x2e,0x78,0x2c,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2e,0x78,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x75,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x31,0x2e,0x30,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x30,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x20,0x2a,0x20,0x69,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x73,0x75,0x6d,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x2e,0x78,0x20,0x2a,0x3d,0x20,0x73,0x75,0x6d,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x2e,0x78,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x65,0x61,0x6e,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x30,0x2e,0x30,0x3b,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x6f,0x75,0x74,0x20,0x2b,0x20,0x69,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x6f,0x75,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6f,0x75,0x74,0x2e,0x78,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x2b,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2f,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x2a,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x73,0x75,0x6d,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x30,0x2e,0x30,0x3b,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x6f,0x75,0x74,0x20,0x2b,0x20,0x69,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x6f,0x75,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x2e,0x78,0x20,0x2b,0x3d,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6f,0x75,0x74,0x2e,0x78,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x2b,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x61,0x78,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x2d,0x4d,0x41,0x58,0x46,0x4c,0x4f,0x41,0x54,0x29,0x3b,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x6d,0x61,0x78,0x28,0x6f,0x75,0x74,0x2c,0x20,0x69,0x6e,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x6f,0x75,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x2e,0x78,0x20,0x3d,0x20,0x6d,0x61,0x78,0x28,0x6f,0x75,0x74,0x2e,0x78,0x2c,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6f,0x75,0x74,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6d,0x61,0x78,0x28,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x2c,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x69,0x6e,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x4d,0x41,0x58,0x46,0x4c,0x4f,0x41,0x54,0x29,0x3b,0x20,0x20,0x20,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6f,0x75,0x74,0x2c,0x20,0x69,0x6e,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x6f,0x75,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x2e,0x78,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x6f,0x75,0x74,0x2e,0x78,0x2c,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6f,0x75,0x74,0x2e,0x78,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6d,0x69,0x6e,0x28,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x2c,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x67,0x65,0x6e,0x65,0x72,0x61,0x6c,0x5f,0x6d,0x75,0x6c,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x72,0x65,0x61,0x64,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x77,0x72,0x69,0x74,0x65,0x5f,0x6f,0x6e,0x6c,0x79,0x20,0x69,0x6d,0x61,0x67,0x65,0x32,0x64,0x5f,0x74,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x32,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x75,0x6d,0x5b,0x32,0x35,0x36,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x31,0x2e,0x30,0x3b,0x20,0x20,0x20,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x28,0x30,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x69,0x64,0x78,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x3d,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x52,0x49,0x5f,0x46,0x28,0x69,0x6e,0x70,0x75,0x74,0x2c,0x20,0x53,0x41,0x4d,0x50,0x4c,0x45,0x52,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x2a,0x68,0x65,0x69,0x67,0x68,0x74,0x2b,0x68,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x6f,0x75,0x74,0x20,0x2a,0x20,0x69,0x6e,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x6f,0x75,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x31,0x3b,0x20,0x69,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x2e,0x78,0x20,0x2a,0x3d,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x5b,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x6f,0x75,0x74,0x2e,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x20,0x3d,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x5f,0x6e,0x75,0x6d,0x2f,0x32,0x3b,0x20,0x69,0x20,0x3e,0x20,0x30,0x3b,0x20,0x69,0x20,0x2f,0x3d,0x20,0x32,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3c,0x20,0x69,0x29,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x3d,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x5d,0x20,0x2a,0x20,0x73,0x75,0x6d,0x5b,0x69,0x64,0x78,0x20,0x2b,0x20,0x69,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x69,0x66,0x20,0x28,0x69,0x64,0x78,0x20,0x3d,0x3d,0x20,0x30,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x57,0x49,0x5f,0x46,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x20,0x28,0x69,0x6e,0x74,0x32,0x29,0x28,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x2c,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x29,0x2c,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x73,0x75,0x6d,0x5b,0x30,0x5d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x7d,0xa,0xa, } }, #ifndef MNN_OPENCL_BUFFER_CLOSED { "reduction_buf", - { 0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x75,0x73,0x65,0x20,0x49,0x4e,0x49,0x54,0x5f,0x53,0x43,0x41,0x4c,0x41,0x52,0x5f,0x56,0x41,0x4c,0x55,0x45,0x2c,0x20,0x4f,0x50,0x45,0x52,0x41,0x54,0x4f,0x52,0x2c,0x20,0x46,0x49,0x4e,0x41,0x4c,0x5f,0x4f,0x50,0x45,0x52,0x41,0x54,0x4f,0x52,0x5f,0x4f,0x4e,0x5f,0x43,0x48,0x41,0x4e,0x4e,0x45,0x4c,0x20,0x6d,0x61,0x63,0x72,0x6f,0x20,0x61,0x62,0x73,0x74,0x72,0x61,0x63,0x74,0x20,0x61,0x6e,0x64,0x20,0x73,0x69,0x6d,0x70,0x6c,0x69,0x66,0x79,0x20,0x63,0x6f,0x64,0x65,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x73,0x75,0x70,0x70,0x6f,0x72,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x20,0x64,0x69,0x6d,0x73,0x20,0x69,0x6e,0x63,0x6c,0x75,0x64,0x65,0x20,0x62,0x61,0x74,0x63,0x68,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x73,0x75,0x70,0x70,0x6f,0x72,0x74,0x20,0x6b,0x65,0x65,0x70,0x5f,0x64,0x69,0x6d,0x3d,0x46,0x61,0x6c,0x73,0x65,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x66,0x69,0x78,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x20,0x72,0x65,0x2d,0x70,0x61,0x63,0x6b,0x20,0x70,0x72,0x6f,0x62,0x6c,0x65,0x6d,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x4d,0x4e,0x4e,0x5f,0x53,0x55,0x50,0x50,0x4f,0x52,0x54,0x5f,0x46,0x50,0x31,0x36,0xa,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x6b,0x68,0x72,0x5f,0x66,0x70,0x31,0x36,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5c,0xa,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x2c,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x62,0x75,0x66,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x28,0x28,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2b,0x20,0x30,0x29,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x29,0x2a,0x34,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5b,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x31,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x20,0x69,0x6e,0x20,0x3d,0x20,0x69,0x6e,0x70,0x75,0x74,0x5b,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x2b,0x20,0x68,0x2a,0x77,0x69,0x64,0x74,0x68,0x2a,0x34,0x5d,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x4f,0x50,0x45,0x52,0x41,0x54,0x45,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x47,0x45,0x54,0x5f,0x41,0x56,0x47,0xa,0x20,0x20,0x20,0x20,0x6e,0x75,0x6d,0x20,0x3d,0x20,0x6e,0x75,0x6d,0x20,0x2f,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x76,0x73,0x74,0x6f,0x72,0x65,0x34,0x28,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x6e,0x75,0x6d,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x2c,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x29,0x3b,0xa,0x7d,0xa, } + { 0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x75,0x73,0x65,0x20,0x49,0x4e,0x49,0x54,0x5f,0x53,0x43,0x41,0x4c,0x41,0x52,0x5f,0x56,0x41,0x4c,0x55,0x45,0x2c,0x20,0x4f,0x50,0x45,0x52,0x41,0x54,0x4f,0x52,0x2c,0x20,0x46,0x49,0x4e,0x41,0x4c,0x5f,0x4f,0x50,0x45,0x52,0x41,0x54,0x4f,0x52,0x5f,0x4f,0x4e,0x5f,0x43,0x48,0x41,0x4e,0x4e,0x45,0x4c,0x20,0x6d,0x61,0x63,0x72,0x6f,0x20,0x61,0x62,0x73,0x74,0x72,0x61,0x63,0x74,0x20,0x61,0x6e,0x64,0x20,0x73,0x69,0x6d,0x70,0x6c,0x69,0x66,0x79,0x20,0x63,0x6f,0x64,0x65,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x73,0x75,0x70,0x70,0x6f,0x72,0x74,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x20,0x64,0x69,0x6d,0x73,0x20,0x69,0x6e,0x63,0x6c,0x75,0x64,0x65,0x20,0x62,0x61,0x74,0x63,0x68,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x73,0x75,0x70,0x70,0x6f,0x72,0x74,0x20,0x6b,0x65,0x65,0x70,0x5f,0x64,0x69,0x6d,0x3d,0x46,0x61,0x6c,0x73,0x65,0xa,0x2f,0x2f,0x20,0x54,0x4f,0x44,0x4f,0x3a,0x20,0x66,0x69,0x78,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x20,0x72,0x65,0x64,0x75,0x63,0x65,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x20,0x72,0x65,0x2d,0x70,0x61,0x63,0x6b,0x20,0x70,0x72,0x6f,0x62,0x6c,0x65,0x6d,0xa,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x4d,0x4e,0x4e,0x5f,0x53,0x55,0x50,0x50,0x4f,0x52,0x54,0x5f,0x46,0x50,0x31,0x36,0xa,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x6b,0x68,0x72,0x5f,0x66,0x70,0x31,0x36,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0xa,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0xa,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0x20,0x5c,0xa,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x30,0x2c,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x5f,0x64,0x69,0x6d,0x31,0x2c,0xa,0xa,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x72,0x65,0x64,0x75,0x63,0x74,0x5f,0x62,0x75,0x66,0x28,0x47,0x4c,0x4f,0x42,0x41,0x4c,0x5f,0x53,0x49,0x5a,0x45,0x5f,0x32,0x5f,0x44,0x49,0x4d,0x53,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x2c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x5f,0x5f,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x20,0x3d,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x31,0x29,0x3b,0xa,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x28,0x28,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2b,0x20,0x30,0x29,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x29,0x2a,0x34,0x3b,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x76,0x6c,0x6f,0x61,0x64,0x34,0x28,0x30,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x20,0x2b,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x68,0x20,0x3d,0x20,0x31,0x3b,0x20,0x68,0x20,0x3c,0x20,0x68,0x65,0x69,0x67,0x68,0x74,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x20,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x34,0x20,0x69,0x6e,0x20,0x3d,0x20,0x76,0x6c,0x6f,0x61,0x64,0x34,0x28,0x30,0x2c,0x20,0x69,0x6e,0x70,0x75,0x74,0x20,0x2b,0x20,0x69,0x6e,0x70,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x2b,0x20,0x68,0x2a,0x77,0x69,0x64,0x74,0x68,0x2a,0x34,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x20,0x3d,0x20,0x4f,0x50,0x45,0x52,0x41,0x54,0x45,0x28,0x6f,0x75,0x74,0x2c,0x20,0x69,0x6e,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x20,0x3d,0x20,0x28,0x46,0x4c,0x4f,0x41,0x54,0x2a,0x29,0x26,0x6f,0x75,0x74,0x3b,0xa,0x20,0x20,0x20,0x20,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x63,0x20,0x3d,0x20,0x31,0x3b,0x20,0x63,0x20,0x3c,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x3b,0x20,0x2b,0x2b,0x63,0x29,0x7b,0xa,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x2e,0x78,0x20,0x3d,0x20,0x4f,0x50,0x45,0x52,0x41,0x54,0x45,0x28,0x6f,0x75,0x74,0x2e,0x78,0x2c,0x20,0x6f,0x75,0x74,0x5f,0x70,0x74,0x72,0x5b,0x63,0x5d,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x7d,0xa,0x20,0x20,0x20,0x20,0xa,0x20,0x20,0x20,0x20,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x47,0x45,0x54,0x5f,0x41,0x56,0x47,0xa,0x20,0x20,0x20,0x20,0x6f,0x75,0x74,0x2e,0x78,0x20,0x3d,0x20,0x6f,0x75,0x74,0x2e,0x78,0x20,0x2f,0x20,0x28,0x68,0x65,0x69,0x67,0x68,0x74,0x20,0x2a,0x20,0x63,0x68,0x61,0x6e,0x6e,0x65,0x6c,0x29,0x3b,0xa,0x20,0x20,0x20,0x20,0x23,0x65,0x6e,0x64,0x69,0x66,0xa,0x20,0x20,0x20,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x20,0x3d,0x20,0x62,0x61,0x74,0x63,0x68,0x5f,0x69,0x64,0x78,0x20,0x2a,0x20,0x77,0x69,0x64,0x74,0x68,0x20,0x2b,0x20,0x77,0x69,0x64,0x74,0x68,0x5f,0x69,0x64,0x78,0x3b,0xa,0x20,0x20,0x20,0x20,0x76,0x73,0x74,0x6f,0x72,0x65,0x34,0x28,0x28,0x46,0x4c,0x4f,0x41,0x54,0x34,0x29,0x28,0x6f,0x75,0x74,0x2e,0x78,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x2c,0x20,0x30,0x2e,0x30,0x29,0x2c,0x20,0x6f,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x20,0x6f,0x75,0x74,0x70,0x75,0x74,0x29,0x3b,0xa,0x7d,0xa, } }, #endif { diff --git a/source/backend/opencl/execution/cl/reduction.cl b/source/backend/opencl/execution/cl/reduction.cl index d40cb190..ff718612 100644 --- a/source/backend/opencl/execution/cl/reduction.cl +++ b/source/backend/opencl/execution/cl/reduction.cl @@ -19,34 +19,44 @@ __kernel void reduct_general_mean(GLOBAL_SIZE_2_DIMS __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(0); const int width_idx = get_global_id(1); - FLOAT sum = 0; + FLOAT4 sum = 0; for (int h = 0; h < height; h++) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum = sum + in.x; + sum = sum + in; } - WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum/height, 0.0, 0.0, 0.0)); + FLOAT* sum_ptr = (FLOAT*)∑ + for(int i = 1; i < channel; ++i){ + sum.x += sum_ptr[i]; + } + WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum.x/(height*channel), 0.0, 0.0, 0.0)); } __kernel void reduct_general_sum(GLOBAL_SIZE_2_DIMS __read_only image2d_t input, __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(0); const int width_idx = get_global_id(1); - FLOAT sum = 0; + FLOAT4 sum = 0; for (int h = 0; h < height; h++) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum = sum + in.x; + sum = sum + in; + } + FLOAT* sum_ptr = (FLOAT*)∑ + for(int i = 1; i < channel; ++i){ + sum.x += sum_ptr[i]; } - WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum, 0.0, 0.0, 0.0)); + WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum.x, 0.0, 0.0, 0.0)); } __kernel void reduct_general_max(GLOBAL_SIZE_2_DIMS @@ -54,17 +64,22 @@ __kernel void reduct_general_max(GLOBAL_SIZE_2_DIMS __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(0); const int width_idx = get_global_id(1); - FLOAT sum = -MAXFLOAT; + FLOAT4 sum = (FLOAT4)-MAXFLOAT; for (int h = 0; h < height; h++) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum = max(sum, in.x); + sum = max(sum, in); } - WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum, 0.0, 0.0, 0.0)); + FLOAT* sum_ptr = (FLOAT*)∑ + for(int i = 1; i < channel; ++i){ + sum.x = max(sum.x, sum_ptr[i]); + } + WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum.x, 0.0, 0.0, 0.0)); } __kernel void reduct_general_min(GLOBAL_SIZE_2_DIMS @@ -72,17 +87,22 @@ __kernel void reduct_general_min(GLOBAL_SIZE_2_DIMS __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(0); const int width_idx = get_global_id(1); - FLOAT sum = MAXFLOAT; + FLOAT4 sum = (FLOAT4)MAXFLOAT; for (int h = 0; h < height; h++) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum = min(sum, in.x); + sum = min(sum, in); } - WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum, 0.0, 0.0, 0.0)); + FLOAT* sum_ptr = (FLOAT*)∑ + for(int i = 1; i < channel; ++i){ + sum.x = min(sum.x, sum_ptr[i]); + } + WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum.x, 0.0, 0.0, 0.0)); } __kernel void reduct_general_mul(GLOBAL_SIZE_2_DIMS @@ -90,17 +110,22 @@ __kernel void reduct_general_mul(GLOBAL_SIZE_2_DIMS __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(0); const int width_idx = get_global_id(1); - FLOAT sum = 1.0; + FLOAT4 sum = (FLOAT4)1.0; for (int h = 0; h < height; h++) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum = sum * in.x; + sum = sum * in; } - WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum, 0.0, 0.0, 0.0)); + FLOAT* sum_ptr = (FLOAT*)∑ + for(int i = 1; i < channel; ++i){ + sum.x *= sum_ptr[i]; + } + WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum.x, 0.0, 0.0, 0.0)); } __kernel void reduct_general_mean_local(GLOBAL_SIZE_2_DIMS @@ -108,21 +133,27 @@ __kernel void reduct_general_mean_local(GLOBAL_SIZE_2_DIMS __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(1); const int width_idx = get_global_id(2); const int idx = get_local_id(0); FLOAT local sum[256]; - sum[idx] = 0.0; + FLOAT4 out = (FLOAT4)0.0; const int reduce_num = get_local_size(0); for (int h = idx; h < height; h+=reduce_num) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum[idx] = sum[idx] + in.x; + out = out + in; } - + FLOAT* out_ptr = (FLOAT*)&out; + for(int i = 1; i < channel; ++i){ + out.x += out_ptr[i]; + } + sum[idx] = out.x; + barrier(CLK_LOCAL_MEM_FENCE); for(int i = reduce_num/2; i > 0; i /= 2){ if (idx < i) @@ -130,7 +161,8 @@ __kernel void reduct_general_mean_local(GLOBAL_SIZE_2_DIMS barrier(CLK_LOCAL_MEM_FENCE); } if (idx == 0) { - WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum[0]/height, 0.0, 0.0, 0.0)); + + WI_F(output, (int2)(width_idx, batch_idx), (FLOAT4)(sum[0]/(height*channel), 0.0, 0.0, 0.0)); } } __kernel void reduct_general_sum_local(GLOBAL_SIZE_2_DIMS @@ -138,22 +170,27 @@ __kernel void reduct_general_sum_local(GLOBAL_SIZE_2_DIMS __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(1); const int width_idx = get_global_id(2); const int idx = get_local_id(0); FLOAT local sum[256]; - sum[idx] = 0.0; - + FLOAT4 out = (FLOAT4)0.0; const int reduce_num = get_local_size(0); for (int h = idx; h < height; h+=reduce_num) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum[idx] = sum[idx] + in.x; + out = out + in; } - + FLOAT* out_ptr = (FLOAT*)&out; + for(int i = 1; i < channel; ++i){ + out.x += out_ptr[i]; + } + sum[idx] = out.x; + barrier(CLK_LOCAL_MEM_FENCE); for(int i = reduce_num/2; i > 0; i /= 2){ if (idx < i) @@ -170,20 +207,26 @@ __kernel void reduct_general_max_local(GLOBAL_SIZE_2_DIMS __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(1); const int width_idx = get_global_id(2); const int idx = get_local_id(0); FLOAT local sum[256]; - sum[idx] = -MAXFLOAT; + FLOAT4 out = (FLOAT4)(-MAXFLOAT); const int reduce_num = get_local_size(0); for (int h = idx; h < height; h+=reduce_num) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum[idx] = max(sum[idx], in.x); + out = max(out, in); + } + FLOAT* out_ptr = (FLOAT*)&out; + for(int i = 1; i < channel; ++i){ + out.x = max(out.x, out_ptr[i]); } + sum[idx] = out.x; barrier(CLK_LOCAL_MEM_FENCE); for(int i = reduce_num/2; i > 0; i /= 2){ @@ -202,22 +245,28 @@ __kernel void reduct_general_min_local(GLOBAL_SIZE_2_DIMS __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(1); const int width_idx = get_global_id(2); const int idx = get_local_id(0); FLOAT local sum[256]; - sum[idx] = MAXFLOAT; + FLOAT4 out = (FLOAT4)(MAXFLOAT); const int reduce_num = get_local_size(0); for (int h = idx; h < height; h+=reduce_num) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum[idx] = min(sum[idx], in.x); + out = min(out, in); } - + FLOAT* out_ptr = (FLOAT*)&out; + for(int i = 1; i < channel; ++i){ + out.x = min(out.x, out_ptr[i]); + } + sum[idx] = out.x; + barrier(CLK_LOCAL_MEM_FENCE); for(int i = reduce_num/2; i > 0; i /= 2){ if (idx < i) @@ -234,21 +283,27 @@ __kernel void reduct_general_mul_local(GLOBAL_SIZE_2_DIMS __write_only image2d_t output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(1); const int width_idx = get_global_id(2); const int idx = get_local_id(0); FLOAT local sum[256]; - sum[idx] = 1.0; + FLOAT4 out = (FLOAT4)1.0; const int reduce_num = get_local_size(0); for (int h = idx; h < height; h+=reduce_num) { FLOAT4 in = RI_F(input, SAMPLER, (int2)(width_idx, batch_idx*height+h)); - sum[idx] = sum[idx] * in.x; + out = out * in; } + FLOAT* out_ptr = (FLOAT*)&out; + for(int i = 1; i < channel; ++i){ + out.x *= out_ptr[i]; + } + sum[idx] = out.x; barrier(CLK_LOCAL_MEM_FENCE); for(int i = reduce_num/2; i > 0; i /= 2){ diff --git a/source/backend/opencl/execution/cl/reduction_buf.cl b/source/backend/opencl/execution/cl/reduction_buf.cl index a559578d..ce3f9650 100644 --- a/source/backend/opencl/execution/cl/reduction_buf.cl +++ b/source/backend/opencl/execution/cl/reduction_buf.cl @@ -14,21 +14,26 @@ __kernel void reduct_buf(GLOBAL_SIZE_2_DIMS __global FLOAT* output, __private const int batch, __private const int height, - __private const int width + __private const int width, + __private const int channel ) { const int batch_idx = get_global_id(0); const int width_idx = get_global_id(1); const int inp_offset = ((batch_idx * height + 0) * width + width_idx)*4; - FLOAT num = input[inp_offset]; + FLOAT4 out = vload4(0, input + inp_offset); for (int h = 1; h < height; h++) { - FLOAT in = input[inp_offset + h*width*4]; - num = OPERATE; + FLOAT4 in = vload4(0, input + inp_offset + h*width*4); + out = OPERATE(out, in); + } + FLOAT* out_ptr = (FLOAT*)&out; + for(int c = 1; c < channel; ++c){ + out.x = OPERATE(out.x, out_ptr[c]); } #ifdef GET_AVG - num = num / height; + out.x = out.x / (height * channel); #endif const int out_offset = batch_idx * width + width_idx; - vstore4((FLOAT4)(num, 0.0, 0.0, 0.0), out_offset, output); + vstore4((FLOAT4)(out.x, 0.0, 0.0, 0.0), out_offset, output); } diff --git a/source/backend/opencl/execution/image/CommonExecution.cpp b/source/backend/opencl/execution/image/CommonExecution.cpp index a0faf656..a32fd4da 100644 --- a/source/backend/opencl/execution/image/CommonExecution.cpp +++ b/source/backend/opencl/execution/image/CommonExecution.cpp @@ -10,7 +10,9 @@ namespace MNN { namespace OpenCL { -CommonExecution::CommonExecution(Backend *backend) : Execution(backend) { +CommonExecution::CommonExecution(Backend *backend, const MNN::Op *Op) + : Execution(backend), mOp(Op) { + mOpType = Op->type(); } ErrorCode CommonExecution::onExecute(const std::vector &inputs, const std::vector &outputs) { auto runtime = ((OpenCLBackend *)backend())->getOpenCLRuntime(); diff --git a/source/backend/opencl/execution/image/CommonExecution.hpp b/source/backend/opencl/execution/image/CommonExecution.hpp index ad2a75cc..c0d67025 100644 --- a/source/backend/opencl/execution/image/CommonExecution.hpp +++ b/source/backend/opencl/execution/image/CommonExecution.hpp @@ -15,7 +15,7 @@ namespace OpenCL { class CommonExecution : public Execution { public: - CommonExecution(Backend *backend); + CommonExecution(Backend *backend, const MNN::Op *Op); virtual ~CommonExecution() = default; virtual ErrorCode onExecute(const std::vector &inputs, const std::vector &outputs) override; diff --git a/source/backend/opencl/execution/image/Conv2DBackPropFilter.cpp b/source/backend/opencl/execution/image/Conv2DBackPropFilter.cpp index 43f5db64..e15b3a42 100644 --- a/source/backend/opencl/execution/image/Conv2DBackPropFilter.cpp +++ b/source/backend/opencl/execution/image/Conv2DBackPropFilter.cpp @@ -15,7 +15,7 @@ namespace MNN { namespace OpenCL { -Conv2DBackPropFilter::Conv2DBackPropFilter(const MNN::Op *op, Backend *backend) : CommonExecution(backend) { +Conv2DBackPropFilter::Conv2DBackPropFilter(const MNN::Op *op, Backend *backend) : CommonExecution(backend, op) { auto common = op->main_as_Convolution2D()->common(); mStrides = {common->strideY(), common->strideX()}; mDilations = {common->dilateY(), common->dilateX()}; @@ -25,8 +25,6 @@ Conv2DBackPropFilter::Conv2DBackPropFilter(const MNN::Op *op, Backend *backend) if (common->padMode() == PadMode_VALID) { mPaddings[0] = mPaddings[1] = 0; } - mOp = op; - mOpType = op->type(); } Conv2DBackPropFilter::~Conv2DBackPropFilter() { diff --git a/source/backend/opencl/execution/image/EltwiseExecution.cpp b/source/backend/opencl/execution/image/EltwiseExecution.cpp index 3f6ff88b..7e704ec3 100644 --- a/source/backend/opencl/execution/image/EltwiseExecution.cpp +++ b/source/backend/opencl/execution/image/EltwiseExecution.cpp @@ -28,10 +28,8 @@ static string swapComputeIn0In1(const string& computeOrigin) { } EltwiseExecution::EltwiseExecution(const std::vector &inputs, const std::string &compute, const MNN::Op *op, Backend *backend) - : CommonExecution(backend), mCompute(compute) { + : CommonExecution(backend, op), mCompute(compute) { mBuildOptions.emplace("-DOPERATOR=" + compute); - mOp = op; - mOpType = op->type(); } uint32_t EltwiseExecution::realSize(const Tensor* tensor) { diff --git a/source/backend/opencl/execution/image/LoopExecution.cpp b/source/backend/opencl/execution/image/LoopExecution.cpp new file mode 100644 index 00000000..aa67870b --- /dev/null +++ b/source/backend/opencl/execution/image/LoopExecution.cpp @@ -0,0 +1,370 @@ +// +// LoopExecution.cpp +// MNN +// +// Created by MNN on 2019/02/28. +// Copyright © 2018, Alibaba Group Holding Limited +// + + +#include "backend/opencl/execution/image/LoopExecution.hpp" +#include "core/Macro.h" +#include "core/TensorUtils.hpp" + +namespace MNN { +namespace OpenCL { + +static void _TileTensor(Tensor *input, cl::Buffer *output, cl::Kernel& kernel, cl::NDRange &globalWorkSize, + cl::NDRange &localWorkSize, const int Width, const int Height, const int Channel, + const int Batch, OpenCLRuntime *runTime, const std::set &buildOptions) { + kernel = runTime->buildKernel("loop", "tile", buildOptions); + uint32_t mMaxWorkGroupSize = static_cast(runTime->getMaxWorkGroupSize(kernel)); + std::vector mGlobalWorkSize = {(uint32_t)(Width * Height), (uint32_t)(UP_DIV(Channel, 4)), (uint32_t)(Batch)}; + + uint32_t index = 0; + kernel.setArg(index++, mGlobalWorkSize[0]); + kernel.setArg(index++, mGlobalWorkSize[1]); + kernel.setArg(index++, mGlobalWorkSize[2]); + kernel.setArg(index++, openCLImage(input)); + kernel.setArg(index++, *output); + kernel.setArg(index++, Width); + kernel.setArg(index++, Height); + kernel.setArg(index++, Channel); + + std::vector mLocalWorkSize = localWS3DDefault(mGlobalWorkSize, mMaxWorkGroupSize, runTime, "tile", kernel).first; + + globalWorkSize = {mGlobalWorkSize[0], mGlobalWorkSize[1], mGlobalWorkSize[2]}; + localWorkSize = {mLocalWorkSize[0], mLocalWorkSize[1], mLocalWorkSize[2]}; +} + +static void _PackTensor(cl::Buffer *input, Tensor *output, cl::Kernel& kernel, cl::NDRange &globalWorkSize, + cl::NDRange &localWorkSize, const int Width, const int Height, const int Channel, + const int Batch, OpenCLRuntime *runTime, const std::set &buildOptions) { + kernel = runTime->buildKernel("loop", "pack", buildOptions); + uint32_t mMaxWorkGroupSize = static_cast(runTime->getMaxWorkGroupSize(kernel)); + std::vector mGlobalWorkSize = {(uint32_t)(Width * Height), (uint32_t)(UP_DIV(Channel, 4)), (uint32_t)(Batch)}; + + uint32_t index = 0; + kernel.setArg(index++, mGlobalWorkSize[0]); + kernel.setArg(index++, mGlobalWorkSize[1]); + kernel.setArg(index++, mGlobalWorkSize[2]); + kernel.setArg(index++, *input); + kernel.setArg(index++, openCLImage(output)); + kernel.setArg(index++, Width); + kernel.setArg(index++, Height); + kernel.setArg(index++, Channel); + + std::vector mLocalWorkSize = localWS3DDefault(mGlobalWorkSize, mMaxWorkGroupSize, runTime, "pack", kernel).first; + + globalWorkSize = {mGlobalWorkSize[0], mGlobalWorkSize[1], mGlobalWorkSize[2]}; + localWorkSize = {mLocalWorkSize[0], mLocalWorkSize[1], mLocalWorkSize[2]}; +} + +static void _setTensorStack(std::vector &result, const std::vector &inputs, + const std::vector &outputs, const LoopParam *loop) { + if (loop->inputIndexes() != nullptr) { + for (int i = 0; i < loop->inputIndexes()->size(); ++i) { + result[loop->inputIndexes()->data()[i]] = inputs[i]; + } + } + for (int i = 0; i < loop->outputIndexes()->size(); ++i) { + result[loop->outputIndexes()->data()[i]] = outputs[i]; + } +} + + + LoopGatherExecution::LoopGatherExecution(const LoopParam *loop, const MNN::Op *op, Backend *bn) + : CommonExecution(bn, op) { + mLoop = loop; + mTensors.resize(mLoop->tensorNumber()); + auto cmd = loop->commands()->GetAs(0); + mOpType = op->type(); + } + ErrorCode LoopGatherExecution::onResize(const std::vector &inputs, const std::vector &outputs) { + auto cmd = mLoop->commands()->GetAs(0); + OpenCLBackend *mOpenCLBackend = (OpenCLBackend *)backend(); + auto runTime = mOpenCLBackend->getOpenCLRuntime(); + auto bufferPool = mOpenCLBackend->getBufferPool(); + auto bufferUnitSize = runTime->isSupportedFP16() ? sizeof(half_float::half) : sizeof(float); + _setTensorStack(mTensors, inputs, outputs, mLoop); + mUnits.clear(); + mOffsetBuffers.clear(); + mTmpBuffers.resize(2); + int x = cmd->size()->data()[0]; + int y = cmd->size()->data()[1]; + int z = cmd->size()->data()[2]; + int n = mLoop->loopNumber(); + + auto srcStride = cmd->view()->GetAs(1)->stride()->data(); + auto dstStride = cmd->view()->GetAs(0)->stride()->data(); + for (int i = 0; i < 3; ++i) { + mStride_src[i] = srcStride[i]; + mStride_dst[i] = dstStride[i]; + } + + mStride_src[3] = cmd->view()->GetAs(1)->offset(); + mStride_dst[3] = cmd->view()->GetAs(0)->offset(); + ::memcpy(mStep, cmd->steps()->data(), cmd->steps()->size() * sizeof(int)); + ::memcpy(mIter, cmd->iterIndexes()->data(), cmd->iterIndexes()->size() * sizeof(int)); + + // tile input + { + auto input = mTensors[cmd->indexes()->data()[1]]; + std::vector Shape = tensorShapeFormat(input); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + mTmpBuffers[1] = bufferPool->alloc(input->elementSize() * bufferUnitSize); + + Unit unit; + _TileTensor(mTensors[cmd->indexes()->data()[1]], mTmpBuffers[1], unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height,Channel, Batch, runTime, mBuildOptions); + mUnits.emplace_back(unit); + } + + for(int i = 0; i < cmd->iterIndexes()->size(); ++i){ + if (mIter[i] >= 0) { + auto input = mTensors[cmd->iterIndexes()->data()[i]]; + std::vector Shape = tensorShapeFormat(input); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + mOffsetBuffers.emplace_back(bufferPool->alloc(input->elementSize() * bufferUnitSize)); + + Unit unit; + _TileTensor(input, mOffsetBuffers.back(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, mBuildOptions); + mUnits.emplace_back(unit); + } + } + + // gather + { + mTmpBuffers[0] = bufferPool->alloc(n * z * y * x * bufferUnitSize); + int offset_index = 0; + Unit unit; + std::string KernelName = "batch_gather"; + unit.kernel = runTime->buildKernel("loop", KernelName, mBuildOptions); + uint32_t mMaxWorkGroupSize = static_cast(runTime->getMaxWorkGroupSize(unit.kernel)); + std::vector mGlobalWorkSize = {(uint32_t)(x * y), (uint32_t)(z), (uint32_t)(n)}; + + uint32_t index = 0; + unit.kernel.setArg(index++, mGlobalWorkSize[0]); + unit.kernel.setArg(index++, mGlobalWorkSize[1]); + unit.kernel.setArg(index++, mGlobalWorkSize[2]); + unit.kernel.setArg(index++, *mTmpBuffers[0]); + unit.kernel.setArg(index++, *mTmpBuffers[1]); + for (int i = 0; i < cmd->iterIndexes()->size(); ++i) { + if (mIter[i] >= 0) { + unit.kernel.setArg(index++, *mOffsetBuffers[offset_index++]); + } else { + unit.kernel.setArg(index++, *mTmpBuffers[0]); + } + } + unit.kernel.setArg(index++, x); + unit.kernel.setArg(index++, sizeof(mStride_src), mStride_src); + unit.kernel.setArg(index++, sizeof(mStride_dst), mStride_dst); + unit.kernel.setArg(index++, sizeof(mStep), mStep); + unit.kernel.setArg(index++, sizeof(mIter), mIter); + + std::vector mLocalWorkSize = localWS3DDefault(mGlobalWorkSize, mMaxWorkGroupSize, runTime, KernelName, unit.kernel).first; + + unit.globalWorkSize = {mGlobalWorkSize[0], mGlobalWorkSize[1], mGlobalWorkSize[2]}; + unit.localWorkSize = {mLocalWorkSize[0], mLocalWorkSize[1], mLocalWorkSize[2]}; + mUnits.emplace_back(unit); + } + + //pack output + { + auto output = mTensors[cmd->indexes()->data()[0]]; + std::vector Shape = tensorShapeFormat(output); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + Unit unit; + _PackTensor(mTmpBuffers[0], mTensors[cmd->indexes()->data()[0]], unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, mBuildOptions); + mUnits.emplace_back(unit); + } + + for (int i = 0; i < mTmpBuffers.size(); ++i) { + bufferPool->recycle(mTmpBuffers[i]); + } + for (int i = 0; i < mOffsetBuffers.size(); ++i) { + bufferPool->recycle(mOffsetBuffers[i]); + } + + return NO_ERROR; + } + + +LoopBatchMatMulExecution::LoopBatchMatMulExecution(const LoopParam *loop, const MNN::Op *op, Backend *bn) + : CommonExecution(bn, op) { + mLoop = loop; + mTensors.resize(mLoop->tensorNumber()); + auto cmd = loop->commands()->GetAs(0); + mHasBias = cmd->indexes()->size() > 3; + mTransposeA = cmd->op()->main_as_MatMul()->transposeA(); + mTransposeB = cmd->op()->main_as_MatMul()->transposeB(); +} +ErrorCode LoopBatchMatMulExecution::onResize(const std::vector &inputs, const std::vector &outputs) { + auto cmd = mLoop->commands()->GetAs(0); + OpenCLBackend *mOpenCLBackend = (OpenCLBackend *)backend(); + auto runTime = mOpenCLBackend->getOpenCLRuntime(); + auto bufferPool = mOpenCLBackend->getBufferPool(); + auto bufferUnitSize = runTime->isSupportedFP16() ? sizeof(half_float::half) : sizeof(float); + _setTensorStack(mTensors, inputs, outputs, mLoop); + + mOffset[0] = cmd->view()->GetAs(0)->offset(); + mOffset[1] = cmd->view()->GetAs(1)->offset(); + mOffset[2] = cmd->view()->GetAs(2)->offset(); + mUnits.clear(); + mOffsetBuffers.clear(); + mTmpBuffers.resize(3); + if (mHasBias) { + mTmpBuffers.resize(4); + mOffset[3] = cmd->view()->GetAs(3)->offset(); + } + + ::memcpy(mStep, cmd->steps()->data(), cmd->steps()->size() * sizeof(int)); + ::memcpy(mIter, cmd->iterIndexes()->data(), cmd->iterIndexes()->size() * sizeof(int)); + int e = cmd->size()->data()[0]; + int l = cmd->size()->data()[1]; + int h = cmd->size()->data()[2]; + int n = mLoop->loopNumber(); + + // tile input + for (int i = 1; i < cmd->indexes()->size(); ++i) { + auto input = mTensors[cmd->indexes()->data()[i]]; + std::vector Shape = tensorShapeFormat(input); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + mTmpBuffers[i] = bufferPool->alloc(input->elementSize() * bufferUnitSize); + + Unit unit; + _TileTensor(input, mTmpBuffers[i], unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, mBuildOptions); + mUnits.emplace_back(unit); + } + + for(int i = 0; i < cmd->iterIndexes()->size(); ++i){ + if (mIter[i] >= 0) { + auto input = mTensors[cmd->iterIndexes()->data()[i]]; + std::vector Shape = tensorShapeFormat(input); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + mOffsetBuffers.emplace_back(bufferPool->alloc(input->elementSize() * bufferUnitSize)); + + Unit unit; + _TileTensor(input, mOffsetBuffers.back(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, mBuildOptions); + mUnits.emplace_back(unit); + } + } + + // matmul + { + mTmpBuffers[0] = bufferPool->alloc(n * e * h * bufferUnitSize); + int offset_index = 0; + + Unit unit; + std::string KernelName = "batch_matmul"; + if (mHasBias) { + mBuildOptions.emplace("-DBIAS"); + } + if (mTransposeA) { + mBuildOptions.emplace("-DTRANSPOSE_A"); + } + if (mTransposeB) { + mBuildOptions.emplace("-DTRANSPOSE_B"); + } + unit.kernel = runTime->buildKernel("loop", KernelName, mBuildOptions); + uint32_t mMaxWorkGroupSize = static_cast(runTime->getMaxWorkGroupSize(unit.kernel)); + std::vector mGlobalWorkSize = {(uint32_t)(h), (uint32_t)(e),(uint32_t)(n)}; + + uint32_t index = 0; + unit.kernel.setArg(index++, mGlobalWorkSize[0]); + unit.kernel.setArg(index++, mGlobalWorkSize[1]); + unit.kernel.setArg(index++, mGlobalWorkSize[2]); + unit.kernel.setArg(index++, *mTmpBuffers[0]); + unit.kernel.setArg(index++, *mTmpBuffers[1]); + unit.kernel.setArg(index++, *mTmpBuffers[2]); + if (mHasBias) { + unit.kernel.setArg(index++, *mTmpBuffers[3]); + } + for (int i = 0; i < cmd->iterIndexes()->size(); ++i) { + if (mIter[i] >= 0) { + unit.kernel.setArg(index++, *mOffsetBuffers[offset_index++]); + } else { + unit.kernel.setArg(index++, *mTmpBuffers[0]); + } + } + unit.kernel.setArg(index++, e); + unit.kernel.setArg(index++, l); + unit.kernel.setArg(index++, h); + unit.kernel.setArg(index++, sizeof(mOffset), mOffset); + unit.kernel.setArg(index++, sizeof(mIter), mIter); + unit.kernel.setArg(index++, sizeof(mStep), mStep); + + std::vector mLocalWorkSize = localWS3DDefault(mGlobalWorkSize, mMaxWorkGroupSize, runTime, KernelName, unit.kernel).first; + + unit.globalWorkSize = {mGlobalWorkSize[0], mGlobalWorkSize[1], mGlobalWorkSize[2]}; + unit.localWorkSize = {mLocalWorkSize[0], mLocalWorkSize[1], mLocalWorkSize[2]}; + mUnits.emplace_back(unit); + } + + //pack output + { + auto output = mTensors[cmd->indexes()->data()[0]]; + std::vector Shape = tensorShapeFormat(output); + const int Channel = Shape.at(3); + const int Width = Shape.at(2); + const int Height = Shape.at(1); + const int Batch = Shape.at(0); + Unit unit; + _PackTensor(mTmpBuffers[0], output, unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, runTime, mBuildOptions); + mUnits.emplace_back(unit); + } + + for (int i = 0; i < mTmpBuffers.size(); ++i) { + bufferPool->recycle(mTmpBuffers[i]); + } + for (int i = 0; i < mOffsetBuffers.size(); ++i) { + bufferPool->recycle(mOffsetBuffers[i]); + } + + return NO_ERROR; +} + +class LoopCreator : public OpenCLBackend::Creator { +public: + virtual Execution *onCreate(const std::vector &inputs, const std::vector &outputs, + const MNN::Op *op, Backend *backend) const override { + auto loop = op->main_as_LoopParam(); + if (nullptr == loop || loop->commands() == nullptr) { + return nullptr; + } + if (nullptr != loop->initCommand()) { + return nullptr; + } + // Make Tensor Stack + if (1 == loop->commands()->size()) { + auto cmd = loop->commands()->GetAs(0); + auto subop = cmd->op(); + if (OpType_UnaryOp == subop->type() && nullptr == subop->main() && cmd->fuse() < 0) { + return new LoopGatherExecution(loop, op, backend); + } + if (OpType_MatMul == subop->type() && loop->parallel()) { + return new LoopBatchMatMulExecution(loop, op, backend); + } + } + return nullptr; + } +}; + +OpenCLCreatorRegister __Loop_op(OpType_While, IMAGE); + +} // namespace OpenCL +} // namespace MNN diff --git a/source/backend/opencl/execution/image/LoopExecution.hpp b/source/backend/opencl/execution/image/LoopExecution.hpp new file mode 100644 index 00000000..d383b8b6 --- /dev/null +++ b/source/backend/opencl/execution/image/LoopExecution.hpp @@ -0,0 +1,58 @@ +// +// LoopExecution.hpp +// MNN +// +// Created by MNN on 2023/05/04. +// Copyright © 2018, Alibaba Group Holding Limited +// + + +#ifndef LoopExecution_hpp +#define LoopExecution_hpp + +#include "backend/opencl/execution/image/CommonExecution.hpp" + +namespace MNN { +namespace OpenCL { + +class LoopGatherExecution : public CommonExecution { +public: + LoopGatherExecution(const LoopParam *loop, const MNN::Op *op, Backend *bn); + virtual ~LoopGatherExecution() = default; + virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; + +private: + const LoopParam *mLoop; + std::vector mTensors; + std::vector mTmpBuffers; + std::vector mOffsetBuffers; + int mStride_src[4]; + int mStride_dst[4]; + int mStep[2]; + int mIter[2]; + std::set mBuildOptions; +}; + +class LoopBatchMatMulExecution : public CommonExecution { +public: + LoopBatchMatMulExecution(const LoopParam *loop, const MNN::Op *op, Backend *bn); + virtual ~LoopBatchMatMulExecution() = default; + virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; + +private: + const LoopParam *mLoop; + std::vector mTensors; + std::vector mTmpBuffers; + std::vector mOffsetBuffers; + int mOffset[4]; + int mStep[4]; + int mIter[4]; + bool mHasBias = false; + bool mTransposeA = false; + bool mTransposeB = false; + std::set mBuildOptions; +}; + +} // namespace OpenCL +} // namespace MNN +#endif /* LoopExecution_hpp */ diff --git a/source/backend/opencl/execution/image/MultiInputDWConvExecution.cpp b/source/backend/opencl/execution/image/MultiInputDWConvExecution.cpp index 2ca4fdb2..cbbfe355 100644 --- a/source/backend/opencl/execution/image/MultiInputDWConvExecution.cpp +++ b/source/backend/opencl/execution/image/MultiInputDWConvExecution.cpp @@ -15,7 +15,7 @@ namespace MNN { namespace OpenCL { -MultiInputDWConvExecution::MultiInputDWConvExecution(const MNN::Op *op, Backend *backend) : CommonExecution(backend) { +MultiInputDWConvExecution::MultiInputDWConvExecution(const MNN::Op *op, Backend *backend) : CommonExecution(backend, op) { auto common = op->main_as_Convolution2D()->common(); mPadMode = common->padMode(); mStrides = {common->strideY(), common->strideX()}; @@ -25,8 +25,6 @@ MultiInputDWConvExecution::MultiInputDWConvExecution(const MNN::Op *op, Backend } isRelu = common->relu(); isRelu6 = common->relu6(); - mOp = op; - mOpType = op->type(); } MultiInputDWConvExecution::~MultiInputDWConvExecution() { diff --git a/source/backend/opencl/execution/image/MultiInputDWDeconvExecution.cpp b/source/backend/opencl/execution/image/MultiInputDWDeconvExecution.cpp index 9ee2d4f6..13e40cd4 100644 --- a/source/backend/opencl/execution/image/MultiInputDWDeconvExecution.cpp +++ b/source/backend/opencl/execution/image/MultiInputDWDeconvExecution.cpp @@ -13,7 +13,7 @@ namespace MNN { namespace OpenCL { -MultiInputDWDeconvExecution::MultiInputDWDeconvExecution(const MNN::Op *op, Backend *backend) : CommonExecution(backend) { +MultiInputDWDeconvExecution::MultiInputDWDeconvExecution(const MNN::Op *op, Backend *backend) : CommonExecution(backend, op) { auto common = op->main_as_Convolution2D()->common(); mStrides = {common->strideY(), common->strideX()}; @@ -30,8 +30,6 @@ MultiInputDWDeconvExecution::MultiInputDWDeconvExecution(const MNN::Op *op, Back isRelu = common->relu(); isRelu6 = common->relu6(); - mOp = op; - mOpType = op->type(); } MultiInputDWDeconvExecution::~MultiInputDWDeconvExecution() { diff --git a/source/backend/opencl/execution/image/RasterExecution.cpp b/source/backend/opencl/execution/image/RasterExecution.cpp index 2cfb07fc..3f604755 100644 --- a/source/backend/opencl/execution/image/RasterExecution.cpp +++ b/source/backend/opencl/execution/image/RasterExecution.cpp @@ -17,10 +17,8 @@ namespace OpenCL { RasterExecution::RasterExecution(const std::vector &inputs, const MNN::Op *op, Backend *backend) - : CommonExecution(backend) { + : CommonExecution(backend, op) { mOpenCLBackend = (OpenCLBackend *)backend; - mOp = op; - mOpType = op->type(); //nothing to do } diff --git a/source/backend/opencl/execution/image/ReductionExecution.cpp b/source/backend/opencl/execution/image/ReductionExecution.cpp index 8e07cde6..ed0bf097 100644 --- a/source/backend/opencl/execution/image/ReductionExecution.cpp +++ b/source/backend/opencl/execution/image/ReductionExecution.cpp @@ -13,7 +13,7 @@ namespace MNN { namespace OpenCL { -ReductionExecution::ReductionExecution(const MNN::Op* op, Backend* backend) : CommonExecution(backend) { +ReductionExecution::ReductionExecution(const MNN::Op* op, Backend* backend) : CommonExecution(backend, op) { #ifdef LOG_VERBOSE MNN_PRINT("start ReductionExecution init !\n"); #endif @@ -44,7 +44,6 @@ ReductionExecution::ReductionExecution(const MNN::Op* op, Backend* backend) : Co MNN_ASSERT(false); break; } - mOp = op; #ifdef LOG_VERBOSE MNN_PRINT("end ReductionExecution init !\n"); #endif @@ -89,7 +88,7 @@ ErrorCode ReductionExecution::onResize(const std::vector &inputs, cons break; } } else { //useLocal - uint32_t global_x; + uint32_t global_x = 8; int size = inputShape[1]; if (size >= 1024) { global_x = 256; @@ -144,6 +143,7 @@ ErrorCode ReductionExecution::onResize(const std::vector &inputs, cons mReduct1DKernel.setArg(idx++, static_cast(inputShape[0])); mReduct1DKernel.setArg(idx++, static_cast(inputShape[1])); mReduct1DKernel.setArg(idx++, static_cast(inputShape[2])); + mReduct1DKernel.setArg(idx++, static_cast(inputShape[3])); return NO_ERROR; } diff --git a/source/backend/opencl/execution/image/ReluExecution.cpp b/source/backend/opencl/execution/image/ReluExecution.cpp index 14ab104d..ab6fe12d 100644 --- a/source/backend/opencl/execution/image/ReluExecution.cpp +++ b/source/backend/opencl/execution/image/ReluExecution.cpp @@ -14,7 +14,7 @@ namespace MNN { namespace OpenCL { ReluExecution::ReluExecution(const std::vector &inputs, const MNN::Op *op, Backend *backend) - : CommonExecution(backend) { + : CommonExecution(backend, op) { auto mOpenCLBackend = static_cast(backend); auto mPreluParamPtr = op->main_as_PRelu(); int preluSize = mPreluParamPtr->slopeCount(); @@ -50,8 +50,6 @@ ReluExecution::ReluExecution(const std::vector &inputs, const MNN::Op mOpenCLBackend->onAcquireBuffer(mPreluParam.get(), Backend::STATIC); copyBufferToImage(mOpenCLBackend->getOpenCLRuntime(), preluBuffer, openCLImage(mPreluParam.get()), UP_DIV(preluSize, 4), 1); - mOp = op; - mOpType = op->type(); } ReluExecution::~ReluExecution() { backend()->onReleaseBuffer(mPreluParam.get(), Backend::STATIC); diff --git a/source/backend/opencl/execution/image/TrainableParamExecution.cpp b/source/backend/opencl/execution/image/TrainableParamExecution.cpp index ae09540c..68eebae5 100644 --- a/source/backend/opencl/execution/image/TrainableParamExecution.cpp +++ b/source/backend/opencl/execution/image/TrainableParamExecution.cpp @@ -13,8 +13,7 @@ namespace MNN { namespace OpenCL { -TrainableParamExecution::TrainableParamExecution(const std::vector &inputs, const MNN::Op *op, Backend *backend) : CommonExecution(backend), mOp(op), mInitialized(false) { - mOp = op; +TrainableParamExecution::TrainableParamExecution(const std::vector &inputs, const MNN::Op *op, Backend *backend) : CommonExecution(backend, op), mInitialized(false) { } TrainableParamExecution::~TrainableParamExecution() { diff --git a/source/common/CommonCompute.hpp b/source/common/CommonCompute.hpp index a0b0ae14..e2ce65e7 100644 --- a/source/common/CommonCompute.hpp +++ b/source/common/CommonCompute.hpp @@ -84,7 +84,41 @@ public: } return true; } + static bool compressFloatWeightToSparse(MNN::OpT* op) { + auto opType = op->type; + auto param = op->main.AsConvolution2D(); + if (param->sparseParameter.get() == nullptr) { + return false; + } + // Encode for sparse float weight + size_t weightSize = param->weight.size(); + if (weightSize > std::numeric_limits().max()) { + MNN_ERROR("The weightSize exceed uint32_t, can't compress the sparse weight\n"); + return false; + } + param->quanParameter.reset(new IDSTQuanT); + size_t validSize = 0; + std::vector indexes; + std::vector newWeights; + + for (size_t i=0; iweight[i] != 0.0f) { + indexes.emplace_back(i); + newWeights.emplace_back(param->weight[i]); + } + } + // If empty, Add Single weight to avoid error, runtime can't extract full sparse convolution + if (indexes.empty()) { + indexes.emplace_back(0); + newWeights.emplace_back(0.0f); + } + param->weight.clear(); + param->quanParameter->alpha = std::move(newWeights); + param->quanParameter->weightSize = (uint32_t)weightSize; + param->quanParameter->index = std::move(indexes); + return true; + } }; } // namespace MNN diff --git a/source/core/Backend.hpp b/source/core/Backend.hpp index eca806d5..c4878bab 100644 --- a/source/core/Backend.hpp +++ b/source/core/Backend.hpp @@ -256,6 +256,10 @@ public: virtual int onGetRuntimeStatus(RuntimeStatus statusEnum) const { return 0; } + // If the info user set can't be match by runtime, return false and set real info + virtual bool onCheckInfo(Backend::Info& info) const { + return true; + } struct OpInfo { bool initCostLong; float exeutionCost; // In ms diff --git a/source/core/ConvolutionCommon.cpp b/source/core/ConvolutionCommon.cpp index 55b88a72..a85c6d4f 100644 --- a/source/core/ConvolutionCommon.cpp +++ b/source/core/ConvolutionCommon.cpp @@ -8,12 +8,13 @@ #include "ConvolutionCommon.hpp" #include +#include "backend/cpu/compute/CommonOptFunction.h" #include "half.hpp" namespace MNN { static inline void *MNNMemoryAllocAlignZeroAlign(size_t size) { return MNNMemoryCallocAlign(size, MNN_MEMORY_ALIGN_DEFAULT); } -static int ReadBlobDim(unsigned char *&myfile, unsigned short *shape, int shapeBufCnt) { +static int ReadBlobDim(unsigned char *&myfile, unsigned int* shape, int shapeBufCnt, bool useInt32) { int uSize = myfile[0]; myfile++; if (uSize > 4) { @@ -24,8 +25,16 @@ static int ReadBlobDim(unsigned char *&myfile, unsigned short *shape, int shapeB if (copyLength > shapeBufCnt) { copyLength = shapeBufCnt; } - ::memcpy(shape, myfile, sizeof(unsigned short) * copyLength); - myfile += copyLength * sizeof(unsigned short); + if (useInt32) { + ::memcpy(shape, myfile, sizeof(unsigned int) * copyLength); + myfile += copyLength * sizeof(unsigned int); + } else { + auto myfileint16 = (uint16_t*)myfile; + for (int i=0; i 64) + unsigned int shape[32] = {0}; + uint32_t shapeDim = (uint32_t)ReadBlobDim(s, shape, 32, shapeInt32); + if (shapeDim == 0 || shapeDim > 32) break; for (uint32_t i = 0; i < shapeDim; i++) dataCnt *= shape[i]; @@ -198,7 +206,8 @@ static int8_t *ReadQuanData_c(unsigned char *&s, uint32_t *len) { if (0 == sampleCnt) { sampleCnt = 256; } - samples = (int8_t *)MNNMemoryAllocAlignZeroAlign(sampleCnt); + result->weightMap.resize(sampleCnt); + auto samples = result->weightMap.data(); if (samples == nullptr) break; StreamSizeRead(samples, 1, sampleCnt, s); @@ -238,8 +247,6 @@ static int8_t *ReadQuanData_c(unsigned char *&s, uint32_t *len) { } } while (0); - if (samples != nullptr) - MNNMemoryFreeAlign(samples); if (idxBuf != nullptr) MNNMemoryFreeAlign(idxBuf); if (idxBytes != nullptr) @@ -249,9 +256,9 @@ static int8_t *ReadQuanData_c(unsigned char *&s, uint32_t *len) { return blob; } -static int8_t *ReadSparseQuanData_c(unsigned char *&myfile, uint32_t *len, const flatbuffers::Vector *alpha) { +static int8_t *ReadSparseQuanData_c(unsigned char *&myfile, size_t* len, const flatbuffers::Vector *alpha, ConvolutionCommon::Int8Common* result, bool useInt32) { // MNN_ERROR("sparse:%d\n", 1); - unsigned short shape[64] = {0}; + unsigned int shape[32]; uint32_t ucMapSize = 0; PSIMPLE_SET setWeight = CreateSimpleSet(256); if (setWeight == nullptr) { @@ -262,8 +269,8 @@ static int8_t *ReadSparseQuanData_c(unsigned char *&myfile, uint32_t *len, const unsigned char iIdxNeedBits; int8_t *blob = nullptr; // 1. weights blob shape(unsigned int32) - int ShapeDim = ReadBlobDim(myfile, shape, 64); - int Size = sizeof(int8_t); + int ShapeDim = ReadBlobDim(myfile, shape, 32, useInt32); + size_t Size = sizeof(int8_t); for (int i = 0; i < ShapeDim; i++) Size *= shape[i]; blob = (int8_t *)MNNMemoryAllocAlignZeroAlign((size_t)Size); @@ -295,11 +302,13 @@ static int8_t *ReadSparseQuanData_c(unsigned char *&myfile, uint32_t *len, const if (0 == ucMapSize) { ucMapSize = 256; } + result->weightMap.resize(ucMapSize); // 6. valueset(signed char * valueset_size) for (int i = 0; i < ucMapSize; i++) { int8_t tmp; StreamSizeRead(&tmp, 1, 1, myfile); InsertSimpleSet(setWeight, tmp); + result->weightMap[i] = tmp; } SimpleRank(setWeight->UniSet, setWeight->CurUniCnt, 1); // map mapWeight; @@ -367,14 +376,61 @@ static int8_t *ReadSparseQuanData_c(unsigned char *&myfile, uint32_t *len, const } std::shared_ptr ConvolutionCommon::load(const IDSTQuan *quan, bool forceFloat, bool forceInt8) { auto result = std::make_shared(); - uint32_t weightLength = 0; + result->quan = quan; + if (quan->index() != nullptr) { + if (forceFloat) { + // Expand sparse to dense + result->weightFloat.reset(quan->weightSize()); + if (nullptr == result->weightFloat.get()) { + return nullptr; + } + ::memset(result->weightFloat.get(), 0, quan->weightSize() * sizeof(float)); + auto index = quan->index()->data(); + auto indexSize = quan->index()->size(); + if (nullptr == quan->alpha() || quan->alpha()->size() != indexSize) { + MNN_ERROR("The model is error, don't has alpha but has index\n"); + return nullptr; + } + auto weightRaw = quan->alpha()->data(); + for (uint32_t i=0; iweightFloat.get()[index[i]] = weightRaw[i]; + } + } // Otherwise needn't treat, just return result with quan info + return result; + } + size_t weightLength = 0; int8_t *buffer = nullptr; auto originBuffer = (unsigned char *)quan->buffer()->data(); if (1 == quan->type()) { - buffer = ReadQuanData_c(originBuffer, &weightLength); + buffer = ReadQuanData_c(originBuffer, &weightLength, result.get(), quan->shapeInt32()); } if (2 == quan->type()) { - buffer = ReadSparseQuanData_c(originBuffer, &weightLength, quan->alpha()); + buffer = ReadSparseQuanData_c(originBuffer, &weightLength, quan->alpha(), result.get(), quan->shapeInt32()); + } + if (result->weightMap.size() > 0 && result->weightMap.size() <= 16) { + // Compute Remap for int4 + result->canUseInt4 = true; + result->weightReverseMap.resize(256); + ::memset(result->weightReverseMap.data(), 0, 256 * sizeof(int8_t)); + for (int i=0; iweightMap.size(); ++i) { + int value = result->weightMap[i]; + value = value + 128; + result->weightReverseMap[value] = i; + } +#ifdef MNN_TEST_REMAPQUANT + // Test reverse + std::vector originBuffer(weightLength); + for (int i=0; iweightReverseMap[(int)buffer[i] + 128]; + } + for (int i=0; iweightMap[buffer[i]]; + } + for (int i=0; itype()) { @@ -406,13 +462,41 @@ std::shared_ptr ConvolutionCommon::load(const IDS } result->weight.set(buffer, weightLength); } - result->quan = quan; result->alpha.reset(quan->alpha()->size()); if (nullptr == result->alpha.get()) { MNN_PRINT("Alloc memory error for extract idst int8\n"); return nullptr; } ::memcpy(result->alpha.get(), quan->alpha()->data(), quan->alpha()->size() * sizeof(float)); + { + int outputCount = 0; + bool oldType4 = (quan->type() == 4 && quan->aMin() == 0 && std::abs(quan->quantScale()) < 1e-6); + if (quan->readType() != 0 || oldType4) { + result->asymmetric = true; + outputCount = result->alpha.size() / 2; + } else { + result->asymmetric = false; + outputCount = result->alpha.size(); // backward compability with previous symmetric quantization + } + if (result->asymmetric) { + // clampMin is minVal in asymmetric quant, clampMin = -(2^(bit)) + // and old version clampMin is -128 + float clampMin = quan->aMin() == 0 ? -128 : quan->aMin(); + for (int o = 0; o < outputCount; ++o) { + result->alpha.get()[2 * o] = result->alpha.get()[2 * o] - clampMin * result->alpha.get()[2 * o + 1]; + } + } + if (!quan->has_scaleInt()) { + float extraFactor = quan->quantScale(); + // for old type 4 models, their quan->quantScale is 0. which will introduce a bug here + if (oldType4) { + extraFactor = 1.0f; + } + for (int o=0; oalpha.size(); ++o) { + result->alpha.get()[o] *= extraFactor; + } + } + } if (forceInt8) { return result; } @@ -424,42 +508,30 @@ std::shared_ptr ConvolutionCommon::load(const IDS return nullptr; } int outputCount = 0; - bool oldType4 = (quan->type() == 4 && quan->aMin() == 0 && std::abs(quan->quantScale()) < 1e-6); - if (quan->readType() != 0 || oldType4) { - outputCount = result->alpha.size() / 2; + if (result->asymmetric) { + outputCount = result->alpha.size() / 2; } else { - outputCount = result->alpha.size(); // backward compability with previous symmetric quantization + outputCount = result->alpha.size(); } int partWeightSize = weightLength / outputCount; for (int o = 0; o < outputCount; ++o) { + float min = 0.0f; + float alpha = 0.0f; + if (result->asymmetric) { + min = result->alpha.get()[2*o]; + alpha = result->alpha.get()[2*o+1]; + } else { + alpha = result->alpha.get()[o]; + } auto dstW = result->weightFloat.get() + o * partWeightSize; auto srcW = result->weight.get() + o * partWeightSize; - float extraFactor = quan->quantScale(); - // for old type 4 models, their quan->quantScale is 0. which will introduce a bug here - if (oldType4) { - extraFactor = 1.0f; - } - if (result->alpha.size() == 2 * outputCount) { - float min = result->alpha.get()[2*o]; - float alpha = result->alpha.get()[2*o+1]; - // clampMin is minVal in asymmetric quant, clampMin = -(2^(bit)) - // and old version clampMin is -128 - float clampMin = quan->aMin() == 0 ? -128 : quan->aMin(); - for (int j = 0; j < partWeightSize; ++j) { - dstW[j] = (( (float)srcW[j] - clampMin ) * alpha + min) * extraFactor; - } - } else { - float alpha = result->alpha.get()[o]; - for (int j = 0; j < partWeightSize; ++j) { - dstW[j] = ((float)srcW[j]) * alpha * extraFactor; - } + for (int v=0; v < partWeightSize; ++v) { + dstW[v] = (float)srcW[v] * alpha + min; } } - result->weight.release(); result->alpha.release(); } - return result; } diff --git a/source/core/ConvolutionCommon.hpp b/source/core/ConvolutionCommon.hpp index 727d4544..62127f67 100644 --- a/source/core/ConvolutionCommon.hpp +++ b/source/core/ConvolutionCommon.hpp @@ -19,6 +19,10 @@ public: AutoStorage alpha; AutoStorage weightFloat; const IDSTQuan* quan; + bool asymmetric; + std::vector weightMap; + std::vector weightReverseMap; + bool canUseInt4 = false; }; static std::shared_ptr load(const IDSTQuan* quan, bool forceFloat = false, bool forceInt8 = false); static void getConvParameters(std::shared_ptr *quanCommon, const MNN::Convolution2D *conv2d, const float** originWeight, int* originWeightSize); diff --git a/source/core/Pipeline.cpp b/source/core/Pipeline.cpp index 9728e8bc..886b36c2 100644 --- a/source/core/Pipeline.cpp +++ b/source/core/Pipeline.cpp @@ -189,6 +189,7 @@ Pipeline::Pipeline(Schedule::PipelineInfo&& info, bool allocInput, bool outputSt #else { #endif + rt->onCheckInfo(info.first.info); mRuntime = rt; mCpuRuntime = cpuRt; mTuneAttr = tune; diff --git a/source/core/Session.cpp b/source/core/Session.cpp index 84e60260..6c1aa432 100644 --- a/source/core/Session.cpp +++ b/source/core/Session.cpp @@ -266,7 +266,16 @@ bool Session::getInfo(Interpreter::SessionInfoCode code, void* ptr) const { } else { *dst = 0; } + return true; } break; + case Interpreter::THREAD_NUMBER: { + auto dst = (int*)ptr; + if (mPipelines.empty()) { + break; + } + *dst = mPipelines[0]->getPipelineInfo().first.info.numThread; + return true; + } // TODO: Support other debug info default: break; diff --git a/source/core/TensorUtils.cpp b/source/core/TensorUtils.cpp index 506698e6..86d8414c 100644 --- a/source/core/TensorUtils.cpp +++ b/source/core/TensorUtils.cpp @@ -399,17 +399,21 @@ bool TensorUtils::isDepthToSpaceRegions(const Tensor* output) { } // compute offset through region -static inline int offsetCompute(Tensor::InsideDescribe::Region reg, int offset, bool backward) { +static inline int offsetCompute(const Tensor::InsideDescribe::Region& reg, int offset, bool backward) { + Tensor::InsideDescribe::View src; + Tensor::InsideDescribe::View dst; if (backward) { - auto tmp = reg.src; - reg.src = reg.dst; - reg.dst = tmp; + src = reg.dst; + dst = reg.src; + } else { + src = reg.src; + dst = reg.dst; } int res = 0; for (int i = 0; i < 3; i++) { if (reg.size[i] > 1) { - res += offset / reg.src.stride[i] * reg.dst.stride[i]; - offset %= reg.src.stride[i]; + res += offset / src.stride[i] * dst.stride[i]; + offset %= src.stride[i]; } } return res; @@ -461,6 +465,23 @@ bool TensorUtils::refTensorContent(Tensor* dst, const Tensor* src) { return needMalloc; } +static bool _RegionValid(int* stride, int offset, int* size, int sizeNum, size_t limitSize) { + int maxOffset = offset; + int minOffset = offset; + // Check start and end + for (int i=0; i 0) { + maxOffset += (stride[i] * (size[i] - 1)); + } else { + minOffset += (stride[i] * (size[i] - 1)); + } + } + if (minOffset < 0 || maxOffset >= limitSize) { + return false; + } + return true; +} + // fuse srcRegion and dstRegion to dstRegion if return true bool TensorUtils::fuseRegion(Tensor::InsideDescribe::Region& srcReg, Tensor::InsideDescribe::Region& dstReg) { // src data isnot full data of dst @@ -573,6 +594,14 @@ bool TensorUtils::fuseRegion(Tensor::InsideDescribe::Region& srcReg, Tensor::Ins } // set final size and set expandIdx if expand val is 1 int expandIdx = -1; + int newSrcOffset = offsetCompute(srcReg, dstReg.src.offset - srcReg.dst.offset, true) + srcReg.src.offset; + if (nullptr != srcReg.origin) { + bool valid = _RegionValid(newSrc, newSrcOffset, dstSize, dstNum, TensorUtils::getRawSize(srcReg.origin)); + if (!valid) { + // Exceed src range + return false; + } + } if (dstNum > sizeNum) { for (int i = 2; i >= 0; i--) { if (i < dstNum) { @@ -654,7 +683,7 @@ bool TensorUtils::fuseRegion(Tensor::InsideDescribe::Region& srcReg, Tensor::Ins } } dstReg.origin = srcReg.origin; - dstReg.src.offset = offsetCompute(srcReg, dstReg.src.offset - srcReg.dst.offset, true) + srcReg.src.offset; + dstReg.src.offset = newSrcOffset; return true; } void TensorUtils::adjustTensorForCompability(Tensor* newTensor) { @@ -680,70 +709,6 @@ Tensor::DimensionType TensorUtils::getDimType(const Tensor* t) { return Tensor::TENSORFLOW; } -halide_type_t TensorUtils::DataTypeToHalideType(DataType t) { - switch (t) { - case DataType_DT_DOUBLE: - case DataType_DT_FLOAT: - return halide_type_of(); - case DataType_DT_BFLOAT16: - return halide_type_t(halide_type_float, 16); - case DataType_DT_QINT32: - case DataType_DT_INT32: - case DataType_DT_BOOL: - case DataType_DT_INT64: - return halide_type_of(); - case DataType_DT_QINT8: - case DataType_DT_INT8: - return halide_type_of(); - case DataType_DT_QUINT8: - case DataType_DT_UINT8: - return halide_type_of(); - case DataType_DT_QUINT16: - case DataType_DT_UINT16: - return halide_type_of(); - case DataType_DT_QINT16: - case DataType_DT_INT16: - return halide_type_of(); - case DataType_DT_STRING: - default: - MNN_PRINT("Unsupported data type!"); - MNN_ASSERT(false); - return halide_type_of(); - } -} - -DataType TensorUtils::HaildeTypeToDataType(halide_type_t t) { - if (t == halide_type_of()) { - return DataType_DT_INT8; - } - if (t == halide_type_of()) { - return DataType_DT_INT16; - } - if (t == halide_type_of()) { - return DataType_DT_INT32; - } - if (t == halide_type_of()) { - return DataType_DT_INT64; - } - if (t == halide_type_of()) { - return DataType_DT_UINT8; - } - if (t == halide_type_of()) { - return DataType_DT_UINT16; - } - if (t == halide_type_t(halide_type_float, 16)) { - return DataType_DT_BFLOAT16; - } - if (t == halide_type_of()) { - return DataType_DT_FLOAT; - } - if (t == halide_type_of()) { - return DataType_DT_DOUBLE; - } - MNN_PRINT("Unsupported data type!"); - MNN_ASSERT(false); - return DataType_DT_INVALID; -} std::vector TensorUtils::getQuantInfo(const Tensor* t) { float scale = getDescribe(t)->quantAttr ? getDescribe(t)->quantAttr->scale : 0.0f; float zero = getDescribe(t)->quantAttr ? getDescribe(t)->quantAttr->zero : 0.0f; diff --git a/source/core/TensorUtils.hpp b/source/core/TensorUtils.hpp index be351242..5b13b2ca 100644 --- a/source/core/TensorUtils.hpp +++ b/source/core/TensorUtils.hpp @@ -163,8 +163,6 @@ public: static bool fuseRegion(Tensor::InsideDescribe::Region& srcReg, Tensor::InsideDescribe::Region& dstReg); static void adjustTensorForCompability(Tensor* t); static Tensor::DimensionType getDimType(const Tensor* t); - static halide_type_t DataTypeToHalideType(DataType t); - static DataType HaildeTypeToDataType(halide_type_t t); static std::vector getQuantInfo(const Tensor* t); static size_t getRawSize(const Tensor* t); diff --git a/source/geometry/GeometryPermute.cpp b/source/geometry/GeometryPermute.cpp index 41ab98e5..1d92bc06 100644 --- a/source/geometry/GeometryPermute.cpp +++ b/source/geometry/GeometryPermute.cpp @@ -6,6 +6,7 @@ // Copyright © 2018, Alibaba Group Holding Limited // +#include #include "geometry/GeometryComputer.hpp" #include "core/TensorUtils.hpp" namespace MNN { @@ -21,7 +22,6 @@ public: MNN_ASSERT(input->dimensions() >= 1); MNN_ASSERT(output->dimensions() == input->dimensions()); auto originTensor = input; - int basicOffset = 0; int shape[MNN_MAX_TENSOR_DIM]; if (op->type() == OpType_Permute) { auto shapeValue = op->main_as_Permute()->dims(); @@ -53,6 +53,7 @@ public: continue; } if (axis - preAxis == 1) { + // Fuse dimension if possible inputShape[inputShapeSize - 1] *= len; } else { if (preAxis >= 0) { @@ -89,7 +90,18 @@ public: stride *= inputShape[i]; } } - int basicStride = 1; + // Sort inputShapeSize from small to large + if (inputShapeSize > 3) { + for (int i=0; i inputShape[j]) { + std::swap(inputShape[i], inputShape[j]); + std::swap(inputStrides[i], inputStrides[j]); + std::swap(outputStrides[i], outputStrides[j]); + } + } + } + } // Compute inside, outside, axis int inside = 1; int insideStride = 0; @@ -99,18 +111,24 @@ public: int axisStride = 0; int breakAxis = -1; int remainSize = 1; + int outputInsideStride = 0; + int outputAxisStride = 0; + int outputOutsideStride = 0; { if (inputShapeSize >= 1) { inside = inputShape[inputShapeSize-1]; insideStride = inputStrides[inputShapeSize-1]; + outputInsideStride = outputStrides[inputShapeSize-1]; } if (inputShapeSize >= 2) { axis = inputShape[inputShapeSize-2]; axisStride = inputStrides[inputShapeSize-2]; + outputAxisStride = outputStrides[inputShapeSize-2]; } if (inputShapeSize >= 3) { outside = inputShape[inputShapeSize-3]; outsideStride = inputStrides[inputShapeSize-3]; + outputOutsideStride = outputStrides[inputShapeSize-3]; breakAxis = inputShapeSize - 3; for (int i = 0; i < inputShapeSize - 3; ++i) { remainSize *= inputShape[i]; @@ -130,24 +148,26 @@ public: for (int indice = 0; indice < remainSize; ++indice) { int value = indice; int inputOffset = 0; + int outputOffset = 0; for (int i = 0; i < breakAxis; ++i) { auto coordinate = value / mod[i]; inputOffset += coordinate * inputStrides[i]; + outputOffset += coordinate * outputStrides[i]; value = value % mod[i]; } Tensor::InsideDescribe::Region& slice = outputDes->regions[indice]; - slice.src.offset = inputOffset + basicOffset; - slice.src.stride[0] = outsideStride * basicStride; + slice.src.offset = inputOffset; + slice.src.stride[0] = outsideStride; slice.size[0] = outside; - slice.src.stride[1] = axisStride * basicStride; + slice.src.stride[1] = axisStride; slice.size[1] = axis; - slice.src.stride[2] = insideStride * basicStride; + slice.src.stride[2] = insideStride; slice.size[2] = inside; slice.origin = originTensor; - slice.dst.offset = indice * outside * axis * inside; - slice.dst.stride[0] = axis * inside; - slice.dst.stride[1] = inside; - slice.dst.stride[2] = 1; + slice.dst.offset = outputOffset; + slice.dst.stride[0] = outputOutsideStride; + slice.dst.stride[1] = outputAxisStride; + slice.dst.stride[2] = outputInsideStride; } return true; } diff --git a/test/core/RegionFuse.cpp b/test/core/RegionFuse.cpp index 5fcb7bfb..9cabc388 100644 --- a/test/core/RegionFuse.cpp +++ b/test/core/RegionFuse.cpp @@ -67,6 +67,8 @@ public: }; for (int i = 0; i < N; i++) { Region src, dst; + src.origin = nullptr; + dst.origin = nullptr; ::memcpy(&src, data[3 * i], 44); ::memcpy(&dst, data[3 * i + 1], 44); bool fused = TensorUtils::fuseRegion(src, dst); diff --git a/test/expr/ExecutorResetTest.cpp b/test/expr/ExecutorResetTest.cpp index 08da8bd2..77ced78e 100644 --- a/test/expr/ExecutorResetTest.cpp +++ b/test/expr/ExecutorResetTest.cpp @@ -68,6 +68,7 @@ public: } virtual bool run(int precision) { + int numberThread = 0; MNN::BackendConfig bnConfig; auto exe = Executor::newExecutor(MNN_FORWARD_CPU, bnConfig, 1); ExecutorScope scope(exe); @@ -77,10 +78,31 @@ public: auto y = _ReduceSum(_Multiply(x, x), {}); ::memset(x->writeMap(), 0, x->getInfo()->size * sizeof(float)); y->readMap(); + auto res = Executor::getComputeInfo(y->expr().first, MNN::Interpreter::THREAD_NUMBER, &numberThread); + if (numberThread != 4 || res == false) { + FUNC_PRINT(1); + return false; + } exe->setGlobalExecutorConfig(MNN_FORWARD_CPU, bnConfig, 4); ::memset(x->writeMap(), 0, x->getInfo()->size * sizeof(float)); y->readMap(); + res = Executor::getComputeInfo(y->expr().first, MNN::Interpreter::THREAD_NUMBER, &numberThread); + if (numberThread != 4 || res == false) { + FUNC_PRINT(1); + return false; + } + exe->setGlobalExecutorConfig(MNN_FORWARD_CPU, bnConfig, 1); + // Reset x, y + x = _Input({1, 3, 224, 224}, NC4HW4); + y = _ReduceSum(_Multiply(x, x), {}); + ::memset(x->writeMap(), 0, x->getInfo()->size * sizeof(float)); + y->readMap(); + res = Executor::getComputeInfo(y->expr().first, MNN::Interpreter::THREAD_NUMBER, &numberThread); + if (numberThread != 1 || res == false) { + FUNC_PRINT(1); + return false; + } return true; } }; diff --git a/test/expr/ModuleTest.cpp b/test/expr/ModuleTest.cpp index d3422f37..c6bd63cb 100644 --- a/test/expr/ModuleTest.cpp +++ b/test/expr/ModuleTest.cpp @@ -689,9 +689,18 @@ public: auto bufferOutput = builderOutput.GetBufferPointer(); std::shared_ptr net(Interpreter::createFromBuffer((void*)bufferOutput, sizeOutput), Interpreter::destroy); ScheduleConfig config; + config.numThread = 1; + int runTime = 5; + auto s0 = net->createSession(config); + { + AUTOTIME; + for (int t = 0; t < runTime; ++t) { + net->runSession(s0); + } + } + net->releaseSession(s0); config.numThread = 4; auto s1 = net->createSession(config); - int runTime = 10; { AUTOTIME; for (int t = 0; t < runTime; ++t) { @@ -699,7 +708,6 @@ public: } } net->releaseSession(s1); - net = nullptr; std::vector allThreads; for (int i = 0; i < 4; ++i) { allThreads.emplace_back(std::thread([runTime, i, bufferOutput, sizeOutput] { @@ -722,6 +730,31 @@ public: for (auto& t : allThreads) { t.join(); } + for (int i=0; i<3; ++i) { + auto rt = Interpreter::createRuntime({config}); + auto s0 = net->createSession(config, rt); + auto s1 = net->createSession(config, rt); + int numberThread = 0; + net->getSessionInfo(s0, MNN::Interpreter::THREAD_NUMBER, &numberThread); + if (numberThread != 4) { + FUNC_PRINT(i); + return false; + } + net->getSessionInfo(s1, MNN::Interpreter::THREAD_NUMBER, &numberThread); + if (numberThread != 4) { + FUNC_PRINT(i); + return false; + } + { + AUTOTIME; + for (int t = 0; t < runTime; ++t) { + net->runSession(s0); + } + } + net->releaseSession(s0); + net->releaseSession(s1); + } + return true; } virtual bool run(int precision) { diff --git a/test/main.cpp b/test/main.cpp index e5d5b4b2..c595b405 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -42,6 +42,7 @@ int main(int argc, char* argv[]) { MNN::BackendConfig config; config.precision = (MNN::BackendConfig::PrecisionMode)precision; MNN::Express::Executor::getGlobalExecutor()->setGlobalExecutorConfig(type, config, thread); + FUNC_PRINT(thread); precisionInTestUtil = getTestPrecision(type, config.precision, MNN::Express::Executor::getGlobalExecutor()->getCurrentRuntimeStatus(MNN::STATUS_SUPPORT_FP16)); MNN_PRINT("After update, precision in TestUtil:%d\n", precisionInTestUtil); } diff --git a/test/op/BatchMatMulTest.cpp b/test/op/BatchMatMulTest.cpp index 8dad2a8a..0f79af2a 100644 --- a/test/op/BatchMatMulTest.cpp +++ b/test/op/BatchMatMulTest.cpp @@ -19,7 +19,8 @@ static void fillFloat(float* dst, int h, int w, ConvertFP32 functor, float offse for (int y = 0; y < h; ++y) { auto dstY = dst + w * y; for (int x = 0; x < w; ++x) { - dstY[x] = functor((float)x * 0.1f + (float)y + offset); + int temp = (x + y) % 31; + dstY[x] = functor(((float)temp + offset) * 0.01f); } } } @@ -38,7 +39,7 @@ static bool checkMatMul(const float* C, const float* A, const float* B, int e, i } expected = functor(expected); auto diff = fabsf(expected - computed); - if (diff > 0.1f) { + if (diff / fabsf(expected) > 0.005f) { MNN_PRINT("%f -> %f\n", expected, computed); res = false; } @@ -270,6 +271,50 @@ public: } } } + // BatchMatMul batch = 1 with large K + { + std::vector> values = { + {16, 262144, 15}, + {3, 262144, 16} + }; + for(auto value : values) { + e = value[0]; + l = value[1]; + h = value[2]; + + std::unique_ptr op(new MNN::OpT); + op->type = MNN::OpType_BatchMatMul; + op->main.type = MNN::OpParameter_BatchMatMulParam; + op->main.value = new MNN::BatchMatMulParamT; + auto param = op->main.AsBatchMatMulParam(); + param->adjX = false; + param->adjY = true; + + int batch = 1; + auto x0 = _Input({}, NHWC, halide_type_of()); + auto x1 = _Input({}, NHWC, halide_type_of()); + x0->resize({batch, h, l}); + x1->resize({batch, l, e}); + auto x0Ptr = x0->writeMap(); + auto x1Ptr = x1->writeMap(); + for (int b = 0; b < batch; ++b) { + fillFloat(x0Ptr + b * h * l, h, l, FP32Converter[precision], (float)b * 10); + fillFloat(x1Ptr + b * e * l, l, e, FP32Converter[precision], (float)b * 10); + } + auto tranposeB = _Transpose(x1, {0, 2, 1}); + auto y = Variable::create(Expr::create(op.get(), {x0, tranposeB})); + + auto yPtr = y->readMap(); + for (int b = 0; b < batch; ++b) { + auto res = checkMatMul(yPtr + b * e * h, x0Ptr + b * h * l, x1Ptr + b * e * l, e, l, h, FP32Converter[precision]); + if (!res) { + FUNC_PRINT(1); + return false; + } + } + } + + } return true; } }; diff --git a/test/op/BinaryOPTest.cpp b/test/op/BinaryOPTest.cpp index d71b8588..2052d4ce 100644 --- a/test/op/BinaryOPTest.cpp +++ b/test/op/BinaryOPTest.cpp @@ -71,6 +71,7 @@ protected: for (int i = 0; i < size_out; ++i) { auto error = (int32_t)data_out[i] - (int32_t)gotOutput[i]; if (error * error > 1) { + MNN_PRINT("Error case = %d:\n", i); MNN_PRINT("%s Test error: compute result=%d, right value=%d\n", name.c_str(), (int32_t)gotOutput[i], (int32_t)data_out[i]); return false; } @@ -88,7 +89,7 @@ class AddTest : public BinaryTestCommon { public: virtual ~AddTest() = default; virtual bool run(int precision) { - return test(_Add, "AddTest", 0.01, + return test(MNN::Express::_Add, "AddTest", 0.01, {-1.0, -2.0, -3.0, -4.0}, {1.0, 2.0, 3.0, 4.0}, {0.0, 0.0, 0.0, 0.0}, {4}, {4}, {4}); } @@ -101,7 +102,7 @@ class AddInt8Test : public BinaryTestCommon { vector inp2 = {1.1, 2.2, 3.3, 4.6}, inp1 = {2}; vector rightResult = {3.1, 4.2, 5.3, 6.6}; - return test(_Add, "AddInt8Test", 0.01, inp1, inp2, rightResult, {1}, {4}, {4}, {0.4, 0.4, 0.4}, + return test(MNN::Express::_Add, "AddInt8Test", 0.01, inp1, inp2, rightResult, {1}, {4}, {4}, {0.4, 0.4, 0.4}, {0., 0., 0.}); } }; @@ -110,7 +111,7 @@ class SubtractTest : public BinaryTestCommon { public: virtual ~SubtractTest() = default; virtual bool run(int precision) { - return test(_Subtract, "SubtractTest", 0.01, + return test(MNN::Express::_Subtract, "SubtractTest", 0.01, {-1.0, -2.0, -3.0, -4.0}, {1.0, 2.0, 3.0, 4.0}, {-2.0, -4.0, -6.0, -8.0}, {4}, {4}, {4}); } @@ -119,11 +120,11 @@ class SubtractInt8Test : public BinaryTestCommon { public: virtual ~SubtractInt8Test() = default; virtual bool run(int precision) { - vector inp1 = {1.1, 2.2, 3.3, 4.6, 1.1, 2.2, 3.3, 4.6,1.1, 2.2, 3.3, 4.6,1.1, 2.2, 3.3, 4.6}, inp2 = {5.7}; - vector rightResult = {-4.6, -3.5, -2.4, -1.1, -4.6, -3.5, -2.4, -1.1, -4.6, -3.5, -2.4, + vector inp1 = {7.0, 28.2, 3.3, 4.6, 1.1, 2.2, 3.3, 4.6,1.1, 2.2, 3.3, 4.6,1.1, 2.2, 3.3, 4.6}, inp2 = {5.7}; + vector rightResult = {1.3, 22.5, -2.4, -1.1, -4.6, -3.5, -2.4, -1.1, -4.6, -3.5, -2.4, -1.1, -4.6, -3.5, -2.4, -1.1}; - return test(_Subtract, "SubtractInt8Test", 0.01, inp1, inp2, rightResult, + return test(MNN::Express::_Subtract, "SubtractInt8Test", 0.01, inp1, inp2, rightResult, {4, 4}, {1}, {4, 4}, {0.4, 0.4, 0.4}, {0., 0., 0.}); } }; @@ -132,7 +133,7 @@ class MultiplyTest : public BinaryTestCommon { public: virtual ~MultiplyTest() = default; virtual bool run(int precision) { - return test(_Multiply, "MultiplyTest", 0.01, + return test(MNN::Express::_Multiply, "MultiplyTest", 0.01, {-1.0, -2.0, -3.0, -4.0}, {1.0, 2.0, 3.0, 4.0}, {-1.0, -4.0, -9.0, -16.0}, {4}, {4}, {4}); } @@ -143,7 +144,7 @@ public: virtual bool run(int precision) { vector inp1 = {1.1, 2.2, 3.3, 4.6}, inp2 = {5.7, 2.5, 0.25, 0.43}; vector rightResult = {6.27 , 5.5 , 0.825, 1.978}; - return test(_Multiply, "MultiplyInt8Test", 0.01, inp1, inp2, rightResult, + return test(MNN::Express::_Multiply, "MultiplyInt8Test", 0.01, inp1, inp2, rightResult, {4}, {4}, {4}, {0.4, 0.4, 0.16}, {0., 0., 0.}); } }; @@ -152,7 +153,7 @@ class DivideTest : public BinaryTestCommon { public: virtual ~DivideTest() = default; virtual bool run(int precision) { - return test(_Divide, "DivideTest", 0.01, + return test(MNN::Express::_Divide, "DivideTest", 0.01, {-1.0, -2.0, -3.0, -4.0}, {2.0, 4.0, 6.0, 8.0}, {-0.5, -0.5, -0.5, -0.5}, {4}, {4}, {4}); } @@ -163,7 +164,7 @@ public: virtual bool run(int precision) { vector inp1 = {1.1, 2.2, 3.3, 4.6}, inp2 = {5.7, 2.5, 2.6, 1.88}; vector rightResult = {0.19298, 0.88, 1.269, 2.4468}; - return test(_Divide, "DivideInt8Test", 0.01, inp1, inp2, rightResult, + return test(MNN::Express::_Divide, "DivideInt8Test", 0.01, inp1, inp2, rightResult, {4}, {4}, {4}, {0.4, 0.4, 1.0}, {0., 0., 0.}); } }; @@ -173,7 +174,7 @@ public: virtual ~PowTest() = default; virtual bool run(int precision) { float errorScale = precision <= MNN::BackendConfig::Precision_High ? 1 : 10; - return test(_Pow, "PowTest", 0.01 * errorScale, + return test(MNN::Express::_Pow, "PowTest", 0.01 * errorScale, {-1.0, -2.0, -3.0, -4.0}, {2.0, 4.0, 6.0, 4.0}, {1.0, 16.0, 729.0, 256.0}, {4}, {4}, {4}); } @@ -182,10 +183,10 @@ class PowInt8Test : public BinaryTestCommon { public: virtual ~PowInt8Test() = default; virtual bool run(int precision) { - vector inp1 = {-1.0, -2.0, -3.0, -4.0}, inp2 = {2.0, 4.0, 2, 4.0}; - vector rightResult = {1, 16, 8, 0}; - return test(_Pow, "PowInt8Test", 0.01, inp1, inp2, rightResult, - {4}, {4}, {4}, {1.0, 1.0, 1.0}, {0., 0., 0.}); + vector inp1 = {-1.0, -2.0, -3.0, -4.0}, inp2 = {2.0, 4.0, 3, 4.0}; + vector rightResult = {1, 16, -27.0, 256}; + return test(MNN::Express::_Pow, "PowInt8Test", 0.01, inp1, inp2, rightResult, + {4}, {4}, {4}, {1.0, 1.0, 3.0}, {0., 0., 0.}); } }; @@ -193,7 +194,7 @@ class MinimumTest : public BinaryTestCommon { public: virtual ~MinimumTest() = default; virtual bool run(int precision) { - return test(_Minimum, "MinimumTest", 0.01, + return test(MNN::Express::_Minimum, "MinimumTest", 0.01, {-1.0, -2.0, -3.0, -4.0}, {1.0, 2.0, 3.0, 4.0}, {-1.0, -2.0, -3.0, -4.0}, {4}, {4}, {4}); } @@ -204,7 +205,7 @@ public: virtual bool run(int precision) { vector inp1 = {-1.2, -5.0, 8, 10}, inp2 = {9.3, 3.1, 11.0, 2.9}; vector rightResult = {-1.2, -5.0, 8, 2.9}; - return test(_Minimum, "MinimumInt8Test", 0.01, inp1, inp2, rightResult, + return test(MNN::Express::_Minimum, "MinimumInt8Test", 0.01, inp1, inp2, rightResult, {4}, {4}, {4}, {0.4, 0.4, 0.4}, {0., 0., 0.}); } }; @@ -224,7 +225,7 @@ public: virtual bool run(int precision) { vector inp1 = {-1, -5, 8, 10}, inp2 = {9}; vector rightResult = {9, 9, 9, 10}; - return test(_Maximum, "MaximumInt8Test", 0.01, inp1, inp2, rightResult, + return test(MNN::Express::_Maximum, "MaximumInt8Test", 0.01, inp1, inp2, rightResult, {4}, {1}, {4}, {0.4, 0.4, 0.4}, {0., 0., 0.}); } }; @@ -233,7 +234,7 @@ class BiasAddTest : public BinaryTestCommon { public: virtual ~BiasAddTest() = default; virtual bool run(int precision) { - return test(_BiasAdd, "BiasAddTest", 0.01, + return test(MNN::Express::_BiasAdd, "BiasAddTest", 0.01, {-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0}, {1.0, 2.0}, {0.0, 0.0, -2.0, -2.0, -4.0, -4.0, -6.0, -6.0}, @@ -244,7 +245,7 @@ class GreaterTest : public BinaryTestCommon { public: virtual ~GreaterTest() = default; virtual bool run(int precision) { - return test(_Greater, "GreaterTest", 0, + return test(MNN::Express::_Greater, "GreaterTest", 0, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, {3.0, 4.0}, {0, 0, 0, 0, 1, 1, 1, 1}, @@ -255,7 +256,7 @@ class GreaterEqualTest : public BinaryTestCommon { public: virtual ~GreaterEqualTest() = default; virtual bool run(int precision) { - return test(_GreaterEqual, "GreaterEqualTest", 0, + return test(MNN::Express::_GreaterEqual, "GreaterEqualTest", 0, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, {3.0, 4.0}, {0, 0, 1, 1, 1, 1, 1, 1}, @@ -266,7 +267,7 @@ class LessTest : public BinaryTestCommon { public: virtual ~LessTest() = default; virtual bool run(int precision) { - return test(_Less, "LessTest", 0, + return test(MNN::Express::_Less, "LessTest", 0, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, {3.0, 4.0}, {1, 1, 0, 0, 0, 0, 0, 0}, @@ -277,7 +278,7 @@ class FloorDivTest : public BinaryTestCommon { public: virtual ~FloorDivTest() = default; virtual bool run(int precision) { - return test(_FloorDiv, "FloorDivTest", 0.01, + return test(MNN::Express::_FloorDiv, "FloorDivTest", 0.01, {-1.0, -2.0, -3.0, -4.0, 5.0, 6.0, 7.0, 8.1}, {3.0, 4.0}, {-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 2.0, 2.0}, @@ -290,7 +291,7 @@ public: virtual bool run(int precision) { vector inp1 = {-3.98, 17.5, 25.4, 6.7}, inp2 = {3}; vector rightResult = {-2, 5, 8, 2}; - return test(_FloorDiv, "FloorDivInt8Test", 0.01, inp1, inp2, rightResult, + return test(MNN::Express::_FloorDiv, "FloorDivInt8Test", 0.01, inp1, inp2, rightResult, {4}, {1}, {4}, {0.4, 0.4, 1}, {0., 0., 0.}); } }; @@ -327,7 +328,7 @@ public: z[i + j * 2] = FP32Converter[precision](fmodf(FP32Converter[precision](x[i+j*2]), FP32Converter[precision](y[i]))); } } - return test(_Mod, "ModTestFloat", 0, + return test(MNN::Express::_Mod, "ModTestFloat", 0, x,y,z, {4, 2}, {2}, {4, 2}); } @@ -336,7 +337,7 @@ class SquaredDifferenceTest : public BinaryTestCommon { public: virtual ~SquaredDifferenceTest() = default; virtual bool run(int precision) { - return test(_SquaredDifference, "SquaredDifferenceTest", 0.01, + return test(MNN::Express::_SquaredDifference, "SquaredDifferenceTest", 0.01, {-1.0, -2.0, -3.0, -4.0, 5.0, 6.0, 7.0, 8.001}, {3.0, 4.0}, {16.0, 36.0, 36.0, 64.0, 4.0, 4.0, 16.0, 16.0}, @@ -349,7 +350,7 @@ public: virtual bool run(int precision) { vector inp1 = {-1, -2, -3, -4, 5, 6, 7, 8, -1, -2, -3, -4, 5, 6, 7, 8, -1, -2, -3, -4, 5, 6, 7, 8, -1, -2, -3, -4, 5, 6, 7, 8}, inp2 = {3}; vector rightResult = {16, 25, 36, 49, 4, 9, 16, 25, 16, 25, 36, 49, 4, 9, 16, 25, 16, 25, 36, 49, 4, 9, 16, 25, 16, 25, 36, 49, 4, 9, 16, 25}; - return test(_SquaredDifference, "SquaredDifferenceInt8Test", 0.01, inp1, inp2, rightResult, + return test(MNN::Express::_SquaredDifference, "SquaredDifferenceInt8Test", 0.01, inp1, inp2, rightResult, {8, 4}, {1}, {8, 4}, {1, 1, 1}, {0., 0., 0.}); } }; @@ -358,7 +359,7 @@ class EqualTest : public BinaryTestCommon { public: virtual ~EqualTest() = default; virtual bool run(int precision) { - return test(_Equal, "EqualTest", 0, + return test(MNN::Express::_Equal, "EqualTest", 0, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, {3.0, 4.0}, {0, 0, 1, 1, 0, 0, 0, 0}, @@ -380,7 +381,7 @@ class FloorModTest : public BinaryTestCommon { public: virtual ~FloorModTest() = default; virtual bool run(int precision) { - return test(_FloorMod, "FloorModTest", 0.01, + return test(MNN::Express::_FloorMod, "FloorModTest", 0.01, {-1.0f, -2.0f, -3.0f, -4.0f, 5.0f, 6.0f, 7.0f, 8.1f}, {3.0f, 4.0f}, {2.0f, 2.0f, 0.0f, 0.0f, 2.0f, 2.0f, 1.0f, 0.1f}, @@ -391,7 +392,7 @@ class FloorModInt8Test : public BinaryTestCommon { public: virtual ~FloorModInt8Test() = default; virtual bool run(int precision) { - return test(_FloorMod, "FloorModInt8Test", 0.01, + return test(MNN::Express::_FloorMod, "FloorModInt8Test", 0.01, {-1, -3, 5, 7}, {3.0f}, {2, 0, 2, 1}, {4}, {1}, {4}, {0.3, 0.3, 0.3}, {0., 0., 0.}); @@ -401,7 +402,7 @@ class Atan2Test : public BinaryTestCommon { public: virtual ~Atan2Test() = default; virtual bool run(int precision) { - return test(_Atan2, "Atan2Test", 0.01, + return test(MNN::Express::_Atan2, "Atan2Test", 0.01, {-1.0, -2.0, -3.0, -4.0, 5.0, 6.0, 7.0, 8.0}, {3.0, -4.0}, {-0.32175055, -2.67794504, -0.7853982, -2.35619449, 1.0303768, 2.15879893, 1.1659045, 2.03444394}, @@ -412,7 +413,7 @@ class Atan2Int8Test : public BinaryTestCommon { public: virtual ~Atan2Int8Test() = default; virtual bool run(int precision) { - return test(_Atan2, "Atan2Int8Test", 0.01, + return test(MNN::Express::_Atan2, "Atan2Int8Test", 0.01, {-1, -3, 5, 7}, {3}, {-1, 0, 2, 1}, {4}, {1}, {4}, {1, 1, 1}, {0., 0., 0.}); @@ -523,7 +524,7 @@ public: virtual bool run(int precision) { vector data_x(8, 1), data_y(8, 1), data_out(64, 2); vector shape_x = {4, 1, 2, 1}, shape_y = {2, 1, 4}, shape_out = {4, 2, 2, 4}; - return test(_Add, "BinaryBroadcastShapeTest", 0, + return test(MNN::Express::_Add, "BinaryBroadcastShapeTest", 0, data_x, data_y, data_out, shape_x, shape_y, shape_out); } }; @@ -546,7 +547,7 @@ public: data_out[j + i * 560] = func(data_x[j] - data_y[j + i * 560]); } } - return test(_Subtract, "SubtractBroastTest", 0.01, + return test(MNN::Express::_Subtract, "SubtractBroastTest", 0.01, data_x, data_y, data_out, shape_x, shape_y, shape_out); } }; diff --git a/test/op/ConvolutionTest.cpp b/test/op/ConvolutionTest.cpp index 867ee80c..56c24384 100644 --- a/test/op/ConvolutionTest.cpp +++ b/test/op/ConvolutionTest.cpp @@ -212,9 +212,13 @@ VARP _Conv(std::vector&& weight, std::vector&& bias, VARP x, INTS conv2D->common->kernelY = kernelSize[1]; conv2D->common->relu6 = relu6; conv2D->common->relu = relu; + MNN_ASSERT(weight.size() == channel[1] * (channel[0] / group) * kernelSize[0] * kernelSize[1]); + conv2D->weight = std::move(weight); + MNN_ASSERT(bias.size() == channel[1]); + conv2D->bias = std::move(bias); if (sparese) { size_t weightNNZElement, weightBlockNumber = 0; - CommonCompute::statisticWeightSparsity(weightNNZElement, weightBlockNumber, weight.data(), bias.size(), weight.size() / bias.size(), sparseBlockOC); + CommonCompute::statisticWeightSparsity(weightNNZElement, weightBlockNumber, conv2D->weight.data(), conv2D->bias.size(), conv2D->weight.size() / conv2D->bias.size(), sparseBlockOC); std::unique_ptr arg1(new MNN::AttributeT); arg1->key = "sparseBlockOC"; @@ -250,11 +254,8 @@ VARP _Conv(std::vector&& weight, std::vector&& bias, VARP x, INTS auto sparseComPtr = flatbuffers::GetRoot(builder.GetBufferPointer())->UnPack(); conv2D->sparseParameter.reset(sparseComPtr); + CommonCompute::compressFloatWeightToSparse(convOp.get()); } - MNN_ASSERT(weight.size() == channel[1] * (channel[0] / group) * kernelSize[0] * kernelSize[1]); - conv2D->weight = std::move(weight); - MNN_ASSERT(bias.size() == channel[1]); - conv2D->bias = std::move(bias); return (Variable::create(Expr::create(convOp.get(), {x}))); } diff --git a/tools/converter/source/common/AddSparseInfo.cpp b/tools/converter/source/common/AddSparseInfo.cpp index 9c71f3a7..25df4cad 100644 --- a/tools/converter/source/common/AddSparseInfo.cpp +++ b/tools/converter/source/common/AddSparseInfo.cpp @@ -6,12 +6,22 @@ // Copyright © 2018, Alibaba Group Holding Limited // +#include #include "CommonUtils.hpp" #include "common/CommonCompute.hpp" #include "backend/cpu/compute/SparseConvolutionTiledExecutor.hpp" using namespace MNN; +static inline std::vector getSparsityThreshold() { + // sparsity threadhold values, when sparseblock is + // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + return {1.f, 0.6f, 0.5f, 0.4f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f}; +} +static bool inline shouldUseSparseConvolution(float sparsity, int sparseBlockOC) { + std::vector thresholds = getSparsityThreshold(); + return sparsity > thresholds[std::min(std::max(sparseBlockOC, 0), (int)thresholds.size() - 1)]; +} void AddSparseInfo(std::unique_ptr& op, Compression::Pipeline proto) { auto prune_algo_type = MNN::SparseAlgo_RANDOM; int sparseBlockOC = 1; @@ -41,10 +51,10 @@ void AddSparseInfo(std::unique_ptr& op, Compression::Pipeline proto) { size_t weightNNZElement, weightBlockNumber = 0; CommonCompute::statisticWeightSparsity(weightNNZElement, weightBlockNumber, param->weight.data(), biasSize, weightSize / biasSize, sparseBlockOC); float sparsity = 1. - double(weightNNZElement) / weightSize; - // MNN_PRINT(" opname [%s] sparsity is:%f\n", op->name.c_str(), sparsity); - if (!SparseConvolutionTiledExecutor::shouldUseSparseConvolution(sparsity, sparseBlockOC)) { + if (!shouldUseSparseConvolution(sparsity, sparseBlockOC)) { return; } + // MNN_PRINT(" opname [%s] sparsity is:%f, use sparse\n", op->name.c_str(), sparsity); MNN::AttributeT* arg1(new MNN::AttributeT); arg1->key = "sparseBlockOC"; @@ -74,6 +84,7 @@ void AddSparseInfo(std::unique_ptr& op, Compression::Pipeline proto) { argsVector.emplace_back(sparseArg3); argsVector.emplace_back(sparseArg4); + // sparseArgs need sorted table, can't use obj interface auto sparseArgs = builder.CreateVectorOfSortedTables(&argsVector); auto sparseCom = MNN::CreateSparseCommon(builder, prune_algo_type, sparseArgs); builder.Finish(sparseCom); @@ -81,6 +92,10 @@ void AddSparseInfo(std::unique_ptr& op, Compression::Pipeline proto) { param->sparseParameter.reset(sparseComPtr); + delete arg1; + delete arg2; + delete arg3; + delete arg4; break; } default: diff --git a/tools/converter/source/common/ChannelPruneConvert.cpp b/tools/converter/source/common/ChannelPruneConvert.cpp new file mode 100644 index 00000000..c784f38e --- /dev/null +++ b/tools/converter/source/common/ChannelPruneConvert.cpp @@ -0,0 +1,367 @@ +// +// ChannelPruneConvert.cpp +// MNNConverter +// +// Created by MNN on 2023/05/05. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "CommonUtils.hpp" +#include "MNN/expr/ExprCreator.hpp" +#include +#include +#include +#include + +using namespace MNN; +using namespace MNN::Express; +using namespace std; + +// TODO: add more unsafe ops +static std::vector unSafeOpTypes = { + OpType_BroadcastTo, OpType_BatchToSpaceND, OpType_Concat, OpType_LSTM, OpType_LSTMBlockCell, OpType_Reshape, OpType_Resize, + OpType_RNN, OpType_RNNSequenceGRU, OpType_ScatterNd, OpType_Slice, OpType_SliceTf, OpType_SpaceToBatchND, OpType_Raster, +}; + +struct TensorMaskInfo { + std::vector mask; // per-channel 1 or 0 + std::string oriConvName; +}; + +std::vector findUserOps(int outputIndex, std::unique_ptr& netT, SubGraphProtoT* subgraph) { + std::vector userOps; + if (subgraph) { + for (auto& subOp : subgraph->nodes) { + for (int inputIndex : subOp->inputIndexes) { + if (inputIndex == outputIndex) { + userOps.push_back(subOp.get()); + } + } + } + } else { + for (auto& netOp : netT->oplists) { + for (int inputIndex : netOp->inputIndexes) { + if (inputIndex == outputIndex) { + userOps.push_back(netOp.get()); + } + } + } + } + + return userOps; +} + +// do the actual channel prune on weights and bias +void channelPrune(std::unique_ptr& op, std::unique_ptr& netT, SubGraphProtoT* subgraph, std::map& tensorMaskInfo) { + auto opType = op->type; + if (opType != OpType_Convolution && opType != OpType_ConvolutionDepthwise && opType != OpType_Deconvolution && opType != OpType_DeconvolutionDepthwise && opType != OpType_BatchNorm) { + return; + } + if (op->inputIndexes.size() != 1) { + return; + } + + int inputIndex = op->inputIndexes[0]; + int outputIndex = op->outputIndexes[0]; + std::string inputTensorName = subgraph ? subgraph->tensors[inputIndex] : netT->tensorName[inputIndex]; + std::string outputTensorName = subgraph ? subgraph->tensors[outputIndex] : netT->tensorName[outputIndex]; + + std::vector inputMask = tensorMaskInfo[inputTensorName].mask; + int inputMaskSum = 0; + for (int i = 0; i < inputMask.size(); i++) { + inputMaskSum += inputMask[i]; + } + + if (opType == OpType_BatchNorm) { + if (!(inputMaskSum < inputMask.size())) { + return; + } + + auto bnParams = op->main.AsBatchNorm(); + auto slopFloat = bnParams->slopeData; + auto biasFloat = bnParams->biasData; + auto meanFloat = bnParams->meanData; + auto varianceFloat = bnParams->varData; + + bnParams->slopeData.clear(); + bnParams->biasData.clear(); + bnParams->meanData.clear(); + bnParams->varData.clear(); + + for (int i = 0; i < varianceFloat.size(); i++) { + if (inputMask[i] == 1) { + bnParams->slopeData.push_back(slopFloat[i]); + bnParams->biasData.push_back(biasFloat[i]); + bnParams->meanData.push_back(meanFloat[i]); + bnParams->varData.push_back(varianceFloat[i]); + } + } + bnParams->channels = inputMaskSum; + + return; + } + + auto convParams = op->main.AsConvolution2D(); + auto weightFloat = convParams->weight; + auto biasFloat = convParams->bias; + auto& common = convParams->common; + + int ko = common->outputCount; + int ki = common->inputCount / common->group; + int kh = common->kernelY; + int kw = common->kernelX; + + std::vector opMask; + for (auto info : tensorMaskInfo) { + if (op->name == info.second.oriConvName) { + opMask = info.second.mask; + break; + } + } + + int opMaskSum = 0; + for (int i = 0; i < opMask.size(); i++) { + opMaskSum += opMask[i]; + } + + if (opMaskSum < opMask.size()) { + convParams->weight.clear(); + convParams->bias.clear(); + + for (int i = 0; i < ko; i++) { + int offset = i * ki * kh * kw; + if (opMask[i] == 1) { + for (int j = 0; j < ki * kh * kw; j++) { + convParams->weight.emplace_back(weightFloat[offset + j]); + } + convParams->bias.emplace_back(biasFloat[i]); + } + } + common->outputCount = opMaskSum; + } + + if (inputMaskSum < inputMask.size()) { + auto weightFloat = convParams->weight; + convParams->weight.clear(); + + int ko = common->outputCount; + int ki = common->inputCount / common->group; + int kh = common->kernelY; + int kw = common->kernelX; + + for (int i = 0; i < ko; i++) { + for (int j = 0; j < ki; j++) { + int offset = i * ki * kh * kw + j * kh * kw; + if (inputMask[j] == 1) { + for (int k = 0; k < kh * kw; k++) { + convParams->weight.emplace_back(weightFloat[offset + k]); + } + } + } + } + + common->inputCount = inputMaskSum; + + // we will not do prune for depthwise, its channel pruning only depends on its input tensor's pruning + if (opType == OpType_ConvolutionDepthwise || opType == OpType_DeconvolutionDepthwise) { + common->outputCount = inputMaskSum; + } + } +} + +// propagate and analyze prune mask info in model +void analyzePruneInfo(std::unique_ptr& op, std::unique_ptr& netT, SubGraphProtoT* subgraph, std::map& tensorMaskInfo, std::set& notSafeConvNames) { + auto opType = op->type; + auto inputIndices = op->inputIndexes; + if (inputIndices.size() == 0) { + return; + } + auto outputIndices = op->outputIndexes; + std::vector inputTensorNames; + for (int i = 0; i < inputIndices.size(); i++) { + inputTensorNames.push_back(subgraph ? subgraph->tensors[inputIndices[i]] : netT->tensorName[inputIndices[i]]); + } + std::vector outputTensorNames; + for (int i = 0; i < outputIndices.size(); i++) { + outputTensorNames.push_back(subgraph ? subgraph->tensors[outputIndices[i]] : netT->tensorName[outputIndices[i]]); + } + + if (opType == OpType_Convolution || opType == OpType_Deconvolution) { + if (inputIndices.size() == 1) { + auto convParams = op->main.AsConvolution2D(); + auto weightFloat = convParams->weight; + auto biasFloat = convParams->bias; + auto& common = convParams->common; + + const int ko = common->outputCount; + const int ki = common->inputCount / common->group; + const int kh = common->kernelY; + const int kw = common->kernelX; + + VARP weightVar = _Const(weightFloat.data(), {ko, ki, kh, kw}, NCHW); + + VARP weightMask = _Greater(_ReduceSum(_Abs(weightVar), {1, 2, 3}), _Scalar(1e-6)); + VARP maskSum = _ReduceSum(weightMask); + auto maskInfo = weightMask->getInfo(); + auto maskPtr = weightMask->readMap(); + + if (maskSum->readMap()[0] == maskInfo->size) { + return; + } + + // conv has pruned, propagate its mask down + tensorMaskInfo[outputTensorNames[0]].oriConvName = op->name; + for (int i = 0; i < maskInfo->size; i++) { + tensorMaskInfo[outputTensorNames[0]].mask.push_back(maskPtr[i]); + } + } + + return; + } + + std::vector::iterator iter; + iter = std::find(unSafeOpTypes.begin(), unSafeOpTypes.end(), opType); + // not safe op and num_outputs > 1 op are not safe + if ((iter != unSafeOpTypes.end()) || (outputTensorNames.size() > 1)) { + for (auto name : inputTensorNames) { + if (!tensorMaskInfo[name].oriConvName.empty()) { + // so that input tensor mask's oriConv op is not safe + notSafeConvNames.insert(tensorMaskInfo[name].oriConvName); + } + } + return; + } + + // when a mask is propagated to the output, its oriConv ops are not safe + std::vector userOps = findUserOps(outputIndices[0], netT, subgraph); + if (userOps.size() == 0) { + for (auto name : inputTensorNames) { + if (!tensorMaskInfo[name].oriConvName.empty()) { + notSafeConvNames.insert(tensorMaskInfo[name].oriConvName); + } + } + return; + } + + // if the op has more than one input (including const input) + // we need its input tensor's masks are all from one oriConv op + if (inputIndices.size() > 1) { + std::string oriConvName; + std::string oriTensorName; + for (auto name : inputTensorNames) { + if (!tensorMaskInfo[name].oriConvName.empty()) { + oriConvName = tensorMaskInfo[name].oriConvName; + oriTensorName = name; + } + } + if (oriConvName.empty()) { + return; + } + + // oriConvName is not empty + bool unsafe = false; + for (auto name : inputTensorNames) { + auto tOriName = tensorMaskInfo[name].oriConvName; + if ((tOriName != oriConvName) && (!tOriName.empty())) { + unsafe = true; + } + } + + // if unsafe, all its input tensor mask's oriConvs are not safe + if (unsafe) { + for (auto name : inputTensorNames) { + auto tOriName = tensorMaskInfo[name].oriConvName; + if (!tOriName.empty()) { + notSafeConvNames.insert(tOriName); + } + } + return; + } + + // if safe, propagate mask down + tensorMaskInfo[outputTensorNames[0]].oriConvName = oriConvName; + tensorMaskInfo[outputTensorNames[0]].mask = tensorMaskInfo[oriTensorName].mask; + return; + } + + // for 1 input and 1 output safe op, propagate mask down + tensorMaskInfo[outputTensorNames[0]].oriConvName = tensorMaskInfo[inputTensorNames[0]].oriConvName; + tensorMaskInfo[outputTensorNames[0]].mask = tensorMaskInfo[inputTensorNames[0]].mask; +} + +void channelPruneConvert(std::unique_ptr& netT, MNN::Compression::Pipeline proto) { + bool filterPruned = false; + for (const auto& algo : proto.algo()) { + if (algo.type() == Compression::CompressionAlgo::PRUNE) { + auto prune_type = algo.prune_params().type(); + auto prune_algo_type = MNN::SparseAlgo(prune_type); + if (prune_type == Compression::PruneParams_PruneType_FILTER) { + filterPruned = true; + break; + } + } + } + + if (!filterPruned) { + return; + } + + std::map netMaskInfo; + for (auto tensorName : netT->tensorName) { + netMaskInfo[tensorName] = TensorMaskInfo(); + } + + std::set notSafeConvNames; + for (auto& op : netT->oplists) { + analyzePruneInfo(op, netT, nullptr, netMaskInfo, notSafeConvNames); + } + + std::set::iterator iter; + if (!notSafeConvNames.empty()) { + for (auto& info : netMaskInfo) { + iter = std::find(notSafeConvNames.begin(), notSafeConvNames.end(), info.second.oriConvName); + if (iter != notSafeConvNames.end()) { + for (int i = 0; i < info.second.mask.size(); i++) { + if (info.second.mask[i] == 0) { + info.second.mask[i] = 1; + } + } + } + } + } + + for (auto& op : netT->oplists) { + channelPrune(op, netT, nullptr, netMaskInfo); + } + + + for (auto& subgraph : netT->subgraphs) { + std::map subgraphMaskInfo; + for (auto tensorName : subgraph->tensors) { + subgraphMaskInfo[tensorName] = TensorMaskInfo(); + } + + std::set notSafeConvNames; + for (auto& op : subgraph->nodes) { + analyzePruneInfo(op, netT, subgraph.get(), subgraphMaskInfo, notSafeConvNames); + } + + std::set::iterator iter; + if (!notSafeConvNames.empty()) { + for (auto& info : subgraphMaskInfo) { + iter = std::find(notSafeConvNames.begin(), notSafeConvNames.end(), info.second.oriConvName); + if (iter != notSafeConvNames.end()) { + for (int i = 0; i < info.second.mask.size(); i++) { + if (info.second.mask[i] == 0) { + info.second.mask[i] = 1; + } + } + } + } + } + + for (auto& op : subgraph->nodes) { + channelPrune(op, netT, subgraph.get(), subgraphMaskInfo); + } + } +} diff --git a/tools/converter/source/common/CommonUtils.hpp b/tools/converter/source/common/CommonUtils.hpp index a3af79c9..356e409b 100644 --- a/tools/converter/source/common/CommonUtils.hpp +++ b/tools/converter/source/common/CommonUtils.hpp @@ -24,5 +24,6 @@ void addSparseInfo(std::unique_ptr& netT, MNN::Compression::Pipeline void fullQuantAndCoding(std::unique_ptr& netT, MNN::Compression::Pipeline proto); void weightQuantAndCoding(std::unique_ptr& netT, const modelConfig& config); void addUUID(std::unique_ptr& netT, MNN::Compression::Pipeline proto); +void channelPruneConvert(std::unique_ptr& netT, MNN::Compression::Pipeline proto); #endif // COMMMON_UTILS_HPP diff --git a/tools/converter/source/common/WeightQuantAndCoding.cpp b/tools/converter/source/common/WeightQuantAndCoding.cpp index 9c427f77..ed1459d2 100644 --- a/tools/converter/source/common/WeightQuantAndCoding.cpp +++ b/tools/converter/source/common/WeightQuantAndCoding.cpp @@ -7,6 +7,7 @@ // #include "CommonUtils.hpp" +#include "common/CommonCompute.hpp" #include "cpp/IDSTEncoder.hpp" static float findAbsMax(const float *weights, const int count) { @@ -42,17 +43,26 @@ void WeightQuantAndCoding(std::unique_ptr& op, const modelConfig& conf const auto opType = op->type; // config.weightQuantBits only control weight quantization for float convolution // by default, do coding for convint8 and depthwiseconvint8, if there is any - if ((config.weightQuantBits == 0) && ( - opType != MNN::OpType_ConvInt8 && opType != MNN::OpType_DepthwiseConvInt8)) { - return; - } if (opType != MNN::OpType_Convolution && opType != MNN::OpType_ConvolutionDepthwise && opType != MNN::OpType_Deconvolution && opType != MNN::OpType_DeconvolutionDepthwise && opType != MNN::OpType_ConvInt8 && opType != MNN::OpType_DepthwiseConvInt8) { return; } + auto param = op->main.AsConvolution2D(); + auto& common = param->common; + if (param->quanParameter.get() != nullptr) { + return; + } + if (config.weightQuantBits == 0) { + if (opType == MNN::OpType_ConvInt8 || opType == MNN::OpType_DepthwiseConvInt8) { + // Do nothing + } else { + CommonCompute::compressFloatWeightToSparse(op.get()); + return; + } + } int bits = 8; if ((config.weightQuantBits > 0) && ( opType != MNN::OpType_ConvInt8 && opType != MNN::OpType_DepthwiseConvInt8)) { @@ -62,12 +72,6 @@ void WeightQuantAndCoding(std::unique_ptr& op, const modelConfig& conf bits = std::max(bits, 2); bits = std::min(bits, 8); - auto param = op->main.AsConvolution2D(); - auto& common = param->common; - if (param->quanParameter.get() != nullptr) { - return; - } - int weightSize = param->weight.size(); // shared weights or sth else. if (weightSize == 0) { diff --git a/tools/converter/source/common/writeFb.cpp b/tools/converter/source/common/writeFb.cpp index 0cef7e56..5b948e47 100644 --- a/tools/converter/source/common/writeFb.cpp +++ b/tools/converter/source/common/writeFb.cpp @@ -48,7 +48,9 @@ int writeFb(std::unique_ptr& netT, const std::string& MNNModelFile, c if (config.benchmarkModel) { removeParams(netT); } - + if (config.compressionParamsFile != "") { + channelPruneConvert(netT, proto); + } if (config.saveHalfFloat) { castParamsToHalf(netT); } diff --git a/tools/converter/source/compression/MNN_compression.proto b/tools/converter/source/compression/MNN_compression.proto index 9bc8da4e..62685d0d 100644 --- a/tools/converter/source/compression/MNN_compression.proto +++ b/tools/converter/source/compression/MNN_compression.proto @@ -43,7 +43,7 @@ message LayerQuantizeParams { optional int32 clamp_min = 4 [default = -128]; optional int32 clamp_max = 5 [default = 127]; } - + message WinogradParams { required int32 version = 1 [default = 0]; // units_attr: {kyStart, kxStart, subKy, subKx, unitY, unitX} x N @@ -80,6 +80,7 @@ message PruneParams { enum PruneType { RANDOM = 0; SIMD_OC = 1; + FILTER = 2; } optional PruneType type = 1 [default = RANDOM]; optional LevelPrunerParams level_pruner_params = 2; diff --git a/tools/converter/source/compression/generated/MNN_compression.pb.cc b/tools/converter/source/compression/generated/MNN_compression.pb.cc index eafcb33f..f2cecec9 100644 --- a/tools/converter/source/compression/generated/MNN_compression.pb.cc +++ b/tools/converter/source/compression/generated/MNN_compression.pb.cc @@ -359,25 +359,26 @@ const char descriptor_table_protodef_MNN_5fcompression_2eproto[] PROTOBUF_SECTIO "\030\003 \003(\t\"o\n\022SIMDOCPrunerParams\022\033\n\023weight_t" "ensor_names\030\001 \003(\t\022\024\n\014prune_ratios\030\002 \003(\002\022" "\023\n\013layer_names\030\003 \003(\t\022\021\n\toc_blocks\030\004 \003(\005\"" - "\366\001\n\013PruneParams\022<\n\004type\030\001 \001(\0162&.MNN.Comp" + "\202\002\n\013PruneParams\022<\n\004type\030\001 \001(\0162&.MNN.Comp" "ression.PruneParams.PruneType:\006RANDOM\022\?\n" "\023level_pruner_params\030\002 \001(\0132\".MNN.Compres" "sion.LevelPrunerParams\022B\n\025simd_oc_pruner" "_params\030\003 \001(\0132#.MNN.Compression.SIMDOCPr" - "unerParams\"$\n\tPruneType\022\n\n\006RANDOM\020\000\022\013\n\007S" - "IMD_OC\020\001\"\362\001\n\017CompressionAlgo\022H\n\004type\030\001 \001" - "(\01620.MNN.Compression.CompressionAlgo.Com" - "pressionType:\010QUANTIZE\0225\n\014quant_params\030\002" - " \001(\0132\037.MNN.Compression.QuantizeParams\0222\n" - "\014prune_params\030\003 \001(\0132\034.MNN.Compression.Pr" - "uneParams\"*\n\017CompressionType\022\014\n\010QUANTIZE" - "\020\000\022\t\n\005PRUNE\020\001\"d\n\010Pipeline\022\026\n\007version\030\001 \002" - "(\t:\0050.0.0\022.\n\004algo\030\002 \003(\0132 .MNN.Compressio" - "n.CompressionAlgo\022\020\n\010mnn_uuid\030\003 \001(\t" + "unerParams\"0\n\tPruneType\022\n\n\006RANDOM\020\000\022\013\n\007S" + "IMD_OC\020\001\022\n\n\006FILTER\020\002\"\362\001\n\017CompressionAlgo" + "\022H\n\004type\030\001 \001(\01620.MNN.Compression.Compres" + "sionAlgo.CompressionType:\010QUANTIZE\0225\n\014qu" + "ant_params\030\002 \001(\0132\037.MNN.Compression.Quant" + "izeParams\0222\n\014prune_params\030\003 \001(\0132\034.MNN.Co" + "mpression.PruneParams\"*\n\017CompressionType" + "\022\014\n\010QUANTIZE\020\000\022\t\n\005PRUNE\020\001\"d\n\010Pipeline\022\026\n" + "\007version\030\001 \002(\t:\0050.0.0\022.\n\004algo\030\002 \003(\0132 .MN" + "N.Compression.CompressionAlgo\022\020\n\010mnn_uui" + "d\030\003 \001(\t" ; static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_MNN_5fcompression_2eproto_once; const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_MNN_5fcompression_2eproto = { - false, false, 1835, descriptor_table_protodef_MNN_5fcompression_2eproto, "MNN_compression.proto", + false, false, 1847, descriptor_table_protodef_MNN_5fcompression_2eproto, "MNN_compression.proto", &descriptor_table_MNN_5fcompression_2eproto_once, nullptr, 0, 10, schemas, file_default_instances, TableStruct_MNN_5fcompression_2eproto::offsets, file_level_metadata_MNN_5fcompression_2eproto, file_level_enum_descriptors_MNN_5fcompression_2eproto, file_level_service_descriptors_MNN_5fcompression_2eproto, @@ -444,6 +445,7 @@ bool PruneParams_PruneType_IsValid(int value) { switch (value) { case 0: case 1: + case 2: return true; default: return false; @@ -453,6 +455,7 @@ bool PruneParams_PruneType_IsValid(int value) { #if (__cplusplus < 201703) && (!defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912)) constexpr PruneParams_PruneType PruneParams::RANDOM; constexpr PruneParams_PruneType PruneParams::SIMD_OC; +constexpr PruneParams_PruneType PruneParams::FILTER; constexpr PruneParams_PruneType PruneParams::PruneType_MIN; constexpr PruneParams_PruneType PruneParams::PruneType_MAX; constexpr int PruneParams::PruneType_ARRAYSIZE; diff --git a/tools/converter/source/compression/generated/MNN_compression.pb.h b/tools/converter/source/compression/generated/MNN_compression.pb.h index e9bbc8d4..65795391 100644 --- a/tools/converter/source/compression/generated/MNN_compression.pb.h +++ b/tools/converter/source/compression/generated/MNN_compression.pb.h @@ -153,11 +153,12 @@ inline bool LayerQuantizeParams_QuantMethod_Parse( } enum PruneParams_PruneType : int { PruneParams_PruneType_RANDOM = 0, - PruneParams_PruneType_SIMD_OC = 1 + PruneParams_PruneType_SIMD_OC = 1, + PruneParams_PruneType_FILTER = 2 }; bool PruneParams_PruneType_IsValid(int value); constexpr PruneParams_PruneType PruneParams_PruneType_PruneType_MIN = PruneParams_PruneType_RANDOM; -constexpr PruneParams_PruneType PruneParams_PruneType_PruneType_MAX = PruneParams_PruneType_SIMD_OC; +constexpr PruneParams_PruneType PruneParams_PruneType_PruneType_MAX = PruneParams_PruneType_FILTER; constexpr int PruneParams_PruneType_PruneType_ARRAYSIZE = PruneParams_PruneType_PruneType_MAX + 1; const ::PROTOBUF_NAMESPACE_ID::EnumDescriptor* PruneParams_PruneType_descriptor(); @@ -1935,6 +1936,8 @@ class PruneParams final : PruneParams_PruneType_RANDOM; static constexpr PruneType SIMD_OC = PruneParams_PruneType_SIMD_OC; + static constexpr PruneType FILTER = + PruneParams_PruneType_FILTER; static inline bool PruneType_IsValid(int value) { return PruneParams_PruneType_IsValid(value); } diff --git a/tools/cpp/IDSTEncoder.hpp b/tools/cpp/IDSTEncoder.hpp index 9b441e08..908469fe 100644 --- a/tools/cpp/IDSTEncoder.hpp +++ b/tools/cpp/IDSTEncoder.hpp @@ -18,16 +18,30 @@ using namespace MNN; namespace IDSTEncoder { -static void WriteBlobDim(std::ostream &out, std::vector dims) +static bool WriteBlobDim(std::ostream &out, std::vector dims) { char tmp[4]; + bool useInt32 = false; ((unsigned char *)tmp)[0] = (unsigned char)dims.size(); out.write(tmp, 1); - for (int i = 0; i < dims.size(); i++) - { - unsigned short tmpShort = (unsigned short)dims[i]; - out.write((const char*)(&tmpShort), 2); + for (int i = 0; i < dims.size(); i++) { + if (dims[i] > ((1<<16)-1)) { + useInt32 = true; + break; + } } + if (useInt32) { + for (int i = 0; i < dims.size(); i++) { + unsigned int tmpShort = (unsigned int)dims[i]; + out.write((const char*)(&tmpShort), 4); + } + } else { + for (int i = 0; i < dims.size(); i++) { + unsigned short tmpShort = (unsigned short)dims[i]; + out.write((const char*)(&tmpShort), 2); + } + } + return useInt32; } static void FillBuffer(char *buf, unsigned int buf_len, const char *arr, unsigned int arr_len, unsigned char iNeedBits) @@ -174,7 +188,7 @@ static unsigned int GetBestMaxStep(const float* weightData, int weightSize, unsi return best_nnz; } -static void WriteCQBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag) +static void WriteCQBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag, bool& shapeUseInt32) { //push values into buffer //Find int values in all blobs and check; @@ -239,7 +253,7 @@ static void WriteCQBlobs(std::ostream &out, const float* weightData, const float { char tmp[100]; //1. weights blob shape(unsigned int32) - WriteBlobDim(out, {channel, area}); + shapeUseInt32 = WriteBlobDim(out, {channel, area}); // 2. Avalable values Count(unsigned char) tmp[0] = (unsigned char)iCount; out.write(tmp, 1); @@ -256,7 +270,7 @@ static void WriteCQBlobs(std::ostream &out, const float* weightData, const float delete[] buf; } -static void WriteSparseQuanBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag) +static void WriteSparseQuanBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag, bool& shapeUseInt32) { std::set setWeight; GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag); @@ -358,7 +372,7 @@ static void WriteSparseQuanBlobs(std::ostream &out, const float* weightData, con { //write char tmp[100]; // 1.weights blob shape(unsigned int32) - WriteBlobDim(out, {channel, area}); + shapeUseInt32 = WriteBlobDim(out, {channel, area}); // 2. nnz out.write((const char*) &nnz, 4); // 3. max_step use # bits () (unsigned char) @@ -384,12 +398,14 @@ static void WriteSparseQuanBlobs(std::ostream &out, const float* weightData, con static std::unique_ptr encode(const std::vector& weight, const std::vector& scale, int kernelSize, int kernelNum, bool asymmetricQuantFlag, const int8_t* quantWeightPtr, const int clampMin) { std::ostringstream outputStringStreamCQ, outputStringStreamSQ; - WriteCQBlobs(outputStringStreamCQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag); - WriteSparseQuanBlobs(outputStringStreamSQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag); + bool shapeUseInt32 = false; + WriteCQBlobs(outputStringStreamCQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag, shapeUseInt32); + WriteSparseQuanBlobs(outputStringStreamSQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag, shapeUseInt32); std::unique_ptr idst(new IDSTQuanT); auto cqStr = outputStringStreamCQ.str(); auto sqStr = outputStringStreamSQ.str(); int int8Size = kernelNum * kernelSize; + idst->shapeInt32 = shapeUseInt32; if (quantWeightPtr && (int8Size <= cqStr.size() && int8Size <= sqStr.size())) { idst->type = 4; idst->aMax = kernelNum; diff --git a/tools/cpp/revertMNNModel.cpp b/tools/cpp/revertMNNModel.cpp index e57f9023..2157a638 100644 --- a/tools/cpp/revertMNNModel.cpp +++ b/tools/cpp/revertMNNModel.cpp @@ -59,6 +59,7 @@ void Revert::packMNNNet() { void Revert::initialize(float spasity, int sparseBlockOC, bool rewrite) { if (mMNNNet->bizCode == "benchmark" || rewrite) { randStart(); + bool useSparse = spasity > 0.5f; for (auto& op : mMNNNet->oplists) { const auto opType = op->type; switch (opType) { @@ -71,51 +72,53 @@ void Revert::initialize(float spasity, int sparseBlockOC, bool rewrite) { const int oc = convCommon->outputCount / convCommon->group; param->weight.resize(oc * weightReduceStride); ::memset(param->weight.data(), 0, param->weight.size() * sizeof(float)); - size_t weightNNZElement, weightBlockNumber = 0; - MNN::CommonCompute::fillRandValueAsSparsity(weightNNZElement, weightBlockNumber, param->weight.data(), oc, weightReduceStride, spasity, sparseBlockOC); - - MNN::AttributeT* arg1(new MNN::AttributeT); - arg1->key = "sparseBlockOC"; - arg1->i = sparseBlockOC; - - MNN::AttributeT* arg2(new MNN::AttributeT); - arg2->key = "sparseBlockKernel"; - arg2->i = 1; - - MNN::AttributeT* arg3(new MNN::AttributeT); - arg3->key = "NNZElement"; - arg3->i = weightNNZElement; - - MNN::AttributeT* arg4(new MNN::AttributeT); - arg4->key = "blockNumber"; - arg4->i = weightBlockNumber; - - flatbuffers::FlatBufferBuilder builder; - std::vector> argsVector; - auto sparseArg1 = MNN::CreateAttribute(builder, arg1); - auto sparseArg2 = MNN::CreateAttribute(builder, arg2); - auto sparseArg3 = MNN::CreateAttribute(builder, arg3); - auto sparseArg4 = MNN::CreateAttribute(builder, arg4); - - argsVector.emplace_back(sparseArg1); - argsVector.emplace_back(sparseArg2); - argsVector.emplace_back(sparseArg3); - argsVector.emplace_back(sparseArg4); - - auto sparseArgs = builder.CreateVectorOfSortedTables(&argsVector); - MNN::SparseAlgo prune_algo_type; - if (sparseBlockOC == 4) { - prune_algo_type = MNN::SparseAlgo_SIMD_OC; - } else { - prune_algo_type = MNN::SparseAlgo_RANDOM; - } - auto sparseCom = MNN::CreateSparseCommon(builder, prune_algo_type, sparseArgs); - builder.Finish(sparseCom); - auto sparseComPtr = flatbuffers::GetRoot(builder.GetBufferPointer())->UnPack(); - param->sparseParameter.reset(sparseComPtr); - param->bias.resize(convCommon->outputCount); ::memset(param->bias.data(), 0, param->bias.size() * sizeof(float)); + if (useSparse) { + size_t weightNNZElement, weightBlockNumber = 0; + MNN::CommonCompute::fillRandValueAsSparsity(weightNNZElement, weightBlockNumber, param->weight.data(), oc, weightReduceStride, spasity, sparseBlockOC); + + MNN::AttributeT* arg1(new MNN::AttributeT); + arg1->key = "sparseBlockOC"; + arg1->i = sparseBlockOC; + + MNN::AttributeT* arg2(new MNN::AttributeT); + arg2->key = "sparseBlockKernel"; + arg2->i = 1; + + MNN::AttributeT* arg3(new MNN::AttributeT); + arg3->key = "NNZElement"; + arg3->i = weightNNZElement; + + MNN::AttributeT* arg4(new MNN::AttributeT); + arg4->key = "blockNumber"; + arg4->i = weightBlockNumber; + + flatbuffers::FlatBufferBuilder builder; + std::vector> argsVector; + auto sparseArg1 = MNN::CreateAttribute(builder, arg1); + auto sparseArg2 = MNN::CreateAttribute(builder, arg2); + auto sparseArg3 = MNN::CreateAttribute(builder, arg3); + auto sparseArg4 = MNN::CreateAttribute(builder, arg4); + + argsVector.emplace_back(sparseArg1); + argsVector.emplace_back(sparseArg2); + argsVector.emplace_back(sparseArg3); + argsVector.emplace_back(sparseArg4); + + auto sparseArgs = builder.CreateVectorOfSortedTables(&argsVector); + MNN::SparseAlgo prune_algo_type; + if (sparseBlockOC == 4) { + prune_algo_type = MNN::SparseAlgo_SIMD_OC; + } else { + prune_algo_type = MNN::SparseAlgo_RANDOM; + } + auto sparseCom = MNN::CreateSparseCommon(builder, prune_algo_type, sparseArgs); + builder.Finish(sparseCom); + auto sparseComPtr = flatbuffers::GetRoot(builder.GetBufferPointer())->UnPack(); + param->sparseParameter.reset(sparseComPtr); + MNN::CommonCompute::compressFloatWeightToSparse(op.get()); + } break; } case MNN::OpType_Scale: { diff --git a/tools/script/testMNNFromOnnx.py b/tools/script/testMNNFromOnnx.py index 1fb2abbc..c34eed24 100644 --- a/tools/script/testMNNFromOnnx.py +++ b/tools/script/testMNNFromOnnx.py @@ -270,9 +270,12 @@ if __name__ == '__main__': t = TestModel(modelName) if len(sys.argv) > 2: if sys.argv[2] == 'DEBUG': - debugMode = len(sys.argv) > 2 - print('Debug Mode: ', debugMode) - t.Debug() + message = t.Test() + print(message) + if message.find("TEST_SUCCESS") < 0: + debugMode = len(sys.argv) > 2 + print('Debug Mode: ', debugMode) + t.Debug() else: specifyOpName = sys.argv[2] t.TestName(specifyOpName) diff --git a/tools/train/README_CN.md b/tools/train/README_CN.md index 955e9646..96979bea 100644 --- a/tools/train/README_CN.md +++ b/tools/train/README_CN.md @@ -2,7 +2,7 @@ ## 编译 ### MNN 编译与安装 -- MNN 编译时打开 MNN_SUPPORT_TRAIN 开关:cmake .. -DMNN_SUPPORT_TRAIN=true +- MNN 编译时打开 MNN_BUILD_TRAIN 开关:cmake .. -DMNN_BUILD_TRAIN=true ### 产物 - transformer.out @@ -11,6 +11,7 @@ - train.out - backendTest.out - backwardTest.out +- runTrainDemo.out ## 使用 diff --git a/tools/train/source/exec/transformerExecution.cpp b/tools/train/source/exec/transformerExecution.cpp index 09ec75d2..8e8f8115 100644 --- a/tools/train/source/exec/transformerExecution.cpp +++ b/tools/train/source/exec/transformerExecution.cpp @@ -29,6 +29,35 @@ using namespace MNN::Express; using namespace MNN::Train; using namespace std; + +VARP getLocalLearningRate(std::string pName, std::vector> weightNameGroups, std::vector lrNames, + std::map &lrMap, std::map &extraInputs) { + bool hasLocalOptConf = false; + std::string localLrName; + for (int ii = 0; ii < weightNameGroups.size(); ii++) { + if (std::find(weightNameGroups[ii].begin(), weightNameGroups[ii].end(), pName) != weightNameGroups[ii].end()) { + hasLocalOptConf = true; + localLrName = lrNames[ii]; + break; + } + } + if (!hasLocalOptConf) { + localLrName = "LearningRate"; + } + VARP localLearningRate; + if (lrMap.find(localLrName) != lrMap.end()) { + localLearningRate = lrMap[localLrName]; + } else { + auto newLr = _Input({}, NCHW); + newLr->setName(localLrName); + lrMap[localLrName] = newLr; + localLearningRate = newLr; + } + extraInputs[localLrName] = "float"; + return localLearningRate; +} + + int main(int argc, const char* argv[]) { if (argc < 4) { MNN_PRINT("Usage: ./transformer.out temp.bin dst.bin config.json\n"); @@ -54,34 +83,59 @@ int main(int argc, const char* argv[]) { std::vector onlyUpdateOps; std::vector stopBackPropOps; std::string optimizerType = "SGD"; - if (configObject.HasMember("Optimizor")) { - auto optimizor = configObject["Optimizor"].GetObject(); - if (optimizor.HasMember("OnlyUpdateOps")) { - auto limitArray = optimizor["OnlyUpdateOps"].GetArray(); + std::vector fixAsConstOps; + std::vector> weightNameGroups; + std::vector lrNames; + if (configObject.HasMember("Optimizer")) { + auto optimizer = configObject["Optimizer"].GetObject(); + if (optimizer.HasMember("OnlyUpdateOps")) { + auto limitArray = optimizer["OnlyUpdateOps"].GetArray(); for (auto vIter = limitArray.begin(); vIter != limitArray.end(); vIter++) { onlyUpdateOps.emplace_back(vIter->GetString()); MNN_PRINT("will only update: %s \n", vIter->GetString()); } } - if (optimizor.HasMember("NoUpdateOps")) { - auto limitArray = optimizor["NoUpdateOps"].GetArray(); + if (optimizer.HasMember("NoUpdateOps")) { + auto limitArray = optimizer["NoUpdateOps"].GetArray(); for (auto vIter = limitArray.begin(); vIter != limitArray.end(); vIter++) { noUpdateOps.emplace_back(vIter->GetString()); if (onlyUpdateOps.empty()) MNN_PRINT("will not update: %s \n", vIter->GetString()); } } - if (optimizor.HasMember("StopBackPropOps")) { - auto limitArray = optimizor["StopBackPropOps"].GetArray(); + if (optimizer.HasMember("StopBackPropOps")) { + auto limitArray = optimizer["StopBackPropOps"].GetArray(); for (auto vIter = limitArray.begin(); vIter != limitArray.end(); vIter++) { stopBackPropOps.emplace_back(vIter->GetString()); MNN_PRINT("will stop back prop from (also not update this op): %s \n", vIter->GetString()); } } - if (optimizor.HasMember("type")) { - optimizerType = std::string(optimizor["type"].GetString()); + if (optimizer.HasMember("type")) { + optimizerType = std::string(optimizer["type"].GetString()); MNN_PRINT("optimizer type: %s\n", optimizerType.c_str()); } + if (optimizer.HasMember("FixAsConstOps")) { + auto limitArray = optimizer["FixAsConstOps"].GetArray(); + for (auto vIter = limitArray.begin(); vIter != limitArray.end(); vIter++) { + fixAsConstOps.emplace_back(vIter->GetString()); + MNN_PRINT("this op will be fixed as Const, and maybe turn to Trainable later: %s \n", vIter->GetString()); + } + } + if (optimizer.HasMember("ParameterOptConfig")) { + auto pConf = optimizer["ParameterOptConfig"].GetArray(); + for (auto vIter = pConf.begin(); vIter != pConf.end(); vIter++) { + auto conf = vIter->GetObject(); + if (conf.HasMember("WeightNames") && conf.HasMember("LrName")) { + auto wn = conf["WeightNames"].GetArray(); + std::vector wNames; + for (auto wIter = wn.begin(); wIter != wn.end(); wIter++) { + wNames.push_back(wIter->GetString()); + } + weightNameGroups.push_back(wNames); + lrNames.push_back(conf["LrName"].GetString()); + } + } + } } auto bnMomentum = new MNN::AttributeT; bnMomentum->f = 0.99; @@ -100,6 +154,17 @@ int main(int argc, const char* argv[]) { inputVars = inputsOutputs.first; outputVars = inputsOutputs.second; } + for (auto& varIter : inputVars) { + auto var = varIter.second; + auto varInfo = var->getInfo(); + auto vDims = varInfo->dim; + + if (!fixAsConstOps.empty()) { + if (std::find(fixAsConstOps.begin(), fixAsConstOps.end(), var->name()) != fixAsConstOps.end()) { + var.fix(VARP::CONSTANT); + } + } + } Transformer::TrainConfig trainConfig; trainConfig.noUpdateOps = std::move(noUpdateOps); trainConfig.onlyUpdateOps = std::move(onlyUpdateOps); @@ -185,15 +250,19 @@ int main(int argc, const char* argv[]) { } } } + auto lossInfo = loss->getInfo(); MNN_ASSERT(nullptr != loss); auto gradMap = OpGrad::grad(loss, parameters, stopBackPropOps); // Make Update std::map varUpdateMap; - auto learningRate = _Input(); + auto learningRate = _Input({}, NCHW); learningRate->setName("LearningRate"); - auto weightDecay = _Input(); + auto weightDecay = _Input({}, NCHW); weightDecay->setName("WeightDecay"); + std::map lrMap; + lrMap["LearningRate"] = learningRate; + auto step = _Scalar(1.0f); step->setName("optimize_step"); step.fix(VARP::TRAINABLE); @@ -209,12 +278,13 @@ int main(int argc, const char* argv[]) { } if (optimizerType == "SGD") { - auto momentum = _Input(); + auto momentum = _Input({}, NCHW); momentum->setName("Momentum"); extraInputs["Momentum"] = "float"; for (auto iter : gradMap) { auto p = iter.first; + MNN_PRINT("optimize variable: %s\n", p->name().c_str()); p.fix(VARP::TRAINABLE); auto grad = iter.second; grad->setName(p->name()+"_grad"); @@ -251,7 +321,9 @@ int main(int argc, const char* argv[]) { auto newHistory = gradWithDecay + momentum * history; newHistory->setName("update_" + history->name()); - auto finalGrad = learningRate * history; + VARP localLearningRate = getLocalLearningRate(p->name(), weightNameGroups, lrNames, lrMap, extraInputs); + MNN_PRINT("variable: %s, lr name: %s\n", p->name().c_str(), localLearningRate->name().c_str()); + VARP finalGrad = localLearningRate * history; finalGrad->setName(p->name() + "_final_grad"); auto updateValue = _Subtract(p, finalGrad); @@ -260,11 +332,11 @@ int main(int argc, const char* argv[]) { varUpdateMap[history] = newHistory; } } else if (optimizerType == "ADAM") { - auto beta1 = _Input(); + auto beta1 = _Input({}, NCHW); beta1->setName("Beta1"); - auto beta2 = _Input(); + auto beta2 = _Input({}, NCHW); beta2->setName("Beta2"); - auto eps = _Input(); + auto eps = _Input({}, NCHW); eps->setName("Eps"); extraInputs["Beta1"] = "float"; @@ -276,6 +348,7 @@ int main(int argc, const char* argv[]) { for (auto iter : gradMap) { auto p = iter.first; + MNN_PRINT("optimize variable: %s\n", p->name().c_str()); p.fix(VARP::TRAINABLE); auto grad = iter.second; grad->setName(p->name()+"_grad"); @@ -317,7 +390,9 @@ int main(int argc, const char* argv[]) { auto newHistory2 = beta2 * history2 + (_Scalar(1.0f) - beta2) * _Square(gradWithDecay); newHistory2->setName("update_" + history2->name()); - auto finalGrad = learningRate * correction * (history1 / (_Sqrt(history2 + _Scalar(1e-8)) + eps)); + VARP localLearningRate = getLocalLearningRate(p->name(), weightNameGroups, lrNames, lrMap, extraInputs); + MNN_PRINT("variable: %s, lr name: %s\n", p->name().c_str(), localLearningRate->name().c_str()); + auto finalGrad = localLearningRate * correction * (history1 / (_Sqrt(history2 + _Scalar(1e-8)) + eps)); finalGrad->setName(p->name() + "_final_grad"); auto updateValue = _Subtract(p, finalGrad); diff --git a/tools/train/source/grad/BinaryGrad.cpp b/tools/train/source/grad/BinaryGrad.cpp index 44517506..d4497a06 100644 --- a/tools/train/source/grad/BinaryGrad.cpp +++ b/tools/train/source/grad/BinaryGrad.cpp @@ -79,6 +79,11 @@ public: for (int i = 0; i < expr->outputSize(); ++i) { output[i] = Variable::create(expr, i); } + int activateType = op->main_as_BinaryOp()->activationType(); + if (activateType == 1) { // relu + auto mask = _Cast(_Greater(output[0], _Scalar(0.0f))); + outputDiff = mask * backwardOutput[0]; + } switch (op->main_as_BinaryOp()->opType()) { case BinaryOpOperation_ADD: { res[0] = outputDiff; diff --git a/tools/train/transformConfig.json b/tools/train/transformConfig.json index 7f9751dc..10fd9b98 100644 --- a/tools/train/transformConfig.json +++ b/tools/train/transformConfig.json @@ -1,20 +1,28 @@ { "Train": true, "Loss": { - "op": "output" + "op": "loss" }, - "Optimizor": { + "Optimizer": { "OnlyUpdateOps":[], "NoUpdateOps":[], "StopBackPropOps":[], - "type": "SGD" + "type": "SGD", + "ParameterOptConfig":[ + { + "WeightNames":["example_Weight1", "example_Weight2"], + "LrName":"LearningRate2" + }, + { + "WeightNames":["example_Weight3"], + "LrName":"LearningRate3" + } + ], + "FixAsConstOps":[] }, "BatchNorm": { "momentum":0.99 }, - "Debug": { - "L2Norm": [] - }, "Shape": { "input": [1, 3, 224, 224] } diff --git a/tools/train/transformConfig2.json b/tools/train/transformConfig2.json index 3d493de6..d2f105ea 100644 --- a/tools/train/transformConfig2.json +++ b/tools/train/transformConfig2.json @@ -4,8 +4,5 @@ "OnlyUpdateOps":[], "NoUpdateOps":[], "type": "SGD" - }, - "Debug": { - "L2Norm": [] } }