MNN/benchmark/benchmark.cpp

458 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// benchmark.cpp
// MNN
//
// Created by MNN on 2019/01/31.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <errno.h>
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cstring>
#include <fstream>
#include <iostream>
#include <vector>
#if defined(_MSC_VER)
#include <Windows.h>
#undef min
#undef max
#else
#include <sys/time.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
#endif
#include "core/Backend.hpp"
#include <MNN/Interpreter.hpp>
#include <MNN/MNNDefine.h>
#include <MNN/Tensor.hpp>
#include <MNN/AutoTime.hpp>
#include "revertMNNModel.hpp"
/**
TODOs:
1. dynamically get CPU related info.
2. iOS support
*/
struct Model {
std::string name;
std::string model_file;
};
#if !defined(_MSC_VER)
inline bool file_exist(const char* file) {
struct stat buffer;
return stat(file, &buffer) == 0;
}
#endif
std::vector<Model> findModelFiles(const char* dir) {
std::vector<Model> models;
#if defined(_MSC_VER)
WIN32_FIND_DATA ffd;
HANDLE hFind = INVALID_HANDLE_VALUE;
std::string mnn_model_pattern = std::string(dir) + "\\*.mnn";
hFind = FindFirstFile(mnn_model_pattern.c_str(), &ffd);
if (INVALID_HANDLE_VALUE == hFind) {
std::cout << "open " << dir << " failed: " << strerror(errno) << std::endl;
return models;
}
do {
Model m;
m.name = ffd.cFileName;
m.model_file = std::string(dir) + "\\" + m.name;
if(INVALID_FILE_ATTRIBUTES != GetFileAttributes(m.model_file.c_str()) && GetLastError() != ERROR_FILE_NOT_FOUND) {
models.push_back(std::move(m));
}
} while (FindNextFile(hFind, &ffd) != 0);
FindClose(hFind);
#else
DIR* root;
if ((root = opendir(dir)) == NULL) {
std::cout << "open " << dir << " failed: " << strerror(errno) << std::endl;
return models;
}
struct dirent* ent;
while ((ent = readdir(root)) != NULL) {
Model m;
if (ent->d_name[0] != '.') {
m.name = ent->d_name;
m.model_file = std::string(dir) + "/" + m.name;
if (file_exist(m.model_file.c_str())) {
models.push_back(std::move(m));
}
}
}
closedir(root);
#endif
return models;
}
void setInputData(MNN::Tensor* tensor) {
float* data = tensor->host<float>();
Revert::fillRandValue(data, tensor->elementSize());
}
static inline uint64_t getTimeInUs() {
uint64_t time;
#if defined(_MSC_VER)
LARGE_INTEGER now, freq;
QueryPerformanceCounter(&now);
QueryPerformanceFrequency(&freq);
uint64_t sec = now.QuadPart / freq.QuadPart;
uint64_t usec = (now.QuadPart % freq.QuadPart) * 1000000 / freq.QuadPart;
time = sec * 1000000 + usec;
#else
struct timeval tv;
gettimeofday(&tv, nullptr);
time = static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#endif
return time;
}
std::vector<float> doBench(Model& model, int loop, int warmup = 10, int forward = MNN_FORWARD_CPU, bool only_inference = true,
int numberThread = 4, int precision = 2, float sparsity = 0.0f, int sparseBlockOC = 1, bool testQuantModel=false, bool enableKleidiAI=false) {
auto revertor = std::unique_ptr<Revert>(new Revert(model.model_file.c_str()));
if (testQuantModel) {
revertor->initialize(0, sparseBlockOC, false, true);
} else {
revertor->initialize(sparsity, sparseBlockOC);
}
auto modelBuffer = revertor->getBuffer();
const auto bufferSize = revertor->getBufferSize();
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize), MNN::Interpreter::destroy);
revertor.reset();
net->setSessionMode(MNN::Interpreter::Session_Release);
net->setSessionHint(MNN::Interpreter::HintMode::CPU_ENABLE_KLEIDIAI, enableKleidiAI);
MNN::ScheduleConfig config;
config.numThread = numberThread;
config.type = static_cast<MNNForwardType>(forward);
MNN::BackendConfig backendConfig;
backendConfig.precision = (MNN::BackendConfig::PrecisionMode)precision;
backendConfig.power = MNN::BackendConfig::Power_High;
config.backendConfig = &backendConfig;
std::vector<float> costs;
MNN::Session* session = net->createSession(config);
MNN::Tensor* input = net->getSessionInput(session, NULL);
// if the model has not the input dimension, umcomment the below code to set the input dims
// std::vector<int> dims{1, 3, 224, 224};
// net->resizeTensor(input, dims);
// net->resizeSession(session);
net->releaseModel();
const MNN::Backend* inBackend = net->getBackend(session, input);
std::shared_ptr<MNN::Tensor> givenTensor(MNN::Tensor::createHostTensorFromDevice(input, false));
auto outputTensor = net->getSessionOutput(session, NULL);
std::shared_ptr<MNN::Tensor> expectTensor(MNN::Tensor::createHostTensorFromDevice(outputTensor, false));
// Warming up...
for (int i = 0; i < warmup; ++i) {
void* host = input->map(MNN::Tensor::MAP_TENSOR_WRITE, input->getDimensionType());
input->unmap(MNN::Tensor::MAP_TENSOR_WRITE, input->getDimensionType(), host);
net->runSession(session);
host = outputTensor->map(MNN::Tensor::MAP_TENSOR_READ, outputTensor->getDimensionType());
outputTensor->unmap(MNN::Tensor::MAP_TENSOR_READ, outputTensor->getDimensionType(), host);
}
for (int round = 0; round < loop; round++) {
MNN::Timer _t;
void* host = input->map(MNN::Tensor::MAP_TENSOR_WRITE, input->getDimensionType());
input->unmap(MNN::Tensor::MAP_TENSOR_WRITE, input->getDimensionType(), host);
net->runSession(session);
host = outputTensor->map(MNN::Tensor::MAP_TENSOR_READ, outputTensor->getDimensionType());
outputTensor->unmap(MNN::Tensor::MAP_TENSOR_READ, outputTensor->getDimensionType(), host);
auto time = (float)_t.durationInUs() / 1000.0f;
costs.push_back(time);
}
return costs;
}
void displayStats(const std::string& name, const std::vector<float>& costs, int quant = 0) {
float max = 0, min = FLT_MAX, sum = 0, avg;
for (auto v : costs) {
max = fmax(max, v);
min = fmin(min, v);
sum += v;
//printf("[ - ] cost%f ms\n", v);
}
avg = costs.size() > 0 ? sum / costs.size() : 0;
std::string model = name;
if (quant == 1) {
model = "quant-" + name;
}
printf("[ - ] %-24s max = %8.3f ms min = %8.3f ms avg = %8.3f ms\n", model.c_str(), max, avg == 0 ? 0 : min, avg);
}
static inline std::string forwardType(MNNForwardType type) {
switch (type) {
case MNN_FORWARD_CPU:
return "CPU";
case MNN_FORWARD_VULKAN:
return "Vulkan";
case MNN_FORWARD_OPENCL:
return "OpenCL";
case MNN_FORWARD_METAL:
return "Metal";
default:
break;
}
return "N/A";
}
#ifdef __ANDROID__
#include <errno.h>
#include <unistd.h>
#include <sys/prctl.h>
#include <sys/syscall.h>
#define BUFFER_SIZE 1024
static uint32_t getNumberOfCPU() {
FILE* fp = fopen("/proc/cpuinfo", "rb");
if (!fp) {
return 1;
}
uint32_t number = 0;
char buffer[BUFFER_SIZE];
while (!feof(fp)) {
char* str = fgets(buffer, BUFFER_SIZE, fp);
if (!str) {
break;
}
if (memcmp(buffer, "processor", 9) == 0) {
number++;
}
}
fclose(fp);
if (number < 1) {
number = 1;
}
return number;
}
static int getCPUMaxFreqKHz(int cpuID) {
char path[256];
sprintf(path, "/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state", cpuID);
FILE* fp = fopen(path, "rb");
if (!fp) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state", cpuID);
fp = fopen(path, "rb");
if (!fp) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpuID);
fp = fopen(path, "rb");
if (!fp) {
return -1;
}
int maxfrequency = -1;
fscanf(fp, "%d", &maxfrequency);
fclose(fp);
return maxfrequency;
}
}
int maxfrequency = 0;
while (!feof(fp)) {
int frequency = 0;
int history = fscanf(fp, "%d %*d", &frequency);
if (history != 1) {
break;
}
if (frequency > maxfrequency) {
maxfrequency = frequency;
}
}
fclose(fp);
return maxfrequency;
}
static int sortCPUIDByMaxFrequency(std::vector<int>& cpuIDs, int* littleClusterOffset) {
const int cpuNumbers = cpuIDs.size();
*littleClusterOffset = 0;
if (cpuNumbers == 0) {
return 0;
}
std::vector<int> cpusFrequency;
cpusFrequency.resize(cpuNumbers);
for (int i = 0; i < cpuNumbers; ++i) {
int frequency = getCPUMaxFreqKHz(i);
cpuIDs[i] = i;
cpusFrequency[i] = frequency;
// MNN_PRINT("cpu fre: %d, %d\n", i, frequency);
}
for (int i = 0; i < cpuNumbers; ++i) {
for (int j = i + 1; j < cpuNumbers; ++j) {
if (cpusFrequency[i] < cpusFrequency[j]) {
// id
int temp = cpuIDs[i];
cpuIDs[i] = cpuIDs[j];
cpuIDs[j] = temp;
// frequency
temp = cpusFrequency[i];
cpusFrequency[i] = cpusFrequency[j];
cpusFrequency[j] = temp;
}
}
}
int midMaxFrequency = (cpusFrequency.front() + cpusFrequency.back()) / 2;
if (midMaxFrequency == cpusFrequency.back()) {
return 0;
}
for (int i = 0; i < cpuNumbers; ++i) {
if (cpusFrequency[i] < midMaxFrequency) {
*littleClusterOffset = i;
break;
}
}
return 0;
}
//#define CPU_SETSIZE 1024
#define __NCPUBITS (8 * sizeof (unsigned long))
#endif
void set_cpu_affinity()
{
#ifdef __ANDROID__
int cpu_core_num = sysconf(_SC_NPROCESSORS_CONF);
//LOG_MCNN_CL_INF("cpu core num = %d\n", cpu_core_num);
int cpu_id = 0;
cpu_set_t mask;
CPU_ZERO(&mask);
auto numberOfCPUs = getNumberOfCPU();
static std::vector<int> sortedCPUIDs;
static int littleClusterOffset = 0;
if (sortedCPUIDs.empty()) {
sortedCPUIDs.resize(numberOfCPUs);
for (int i = 0; i < numberOfCPUs; ++i) {
sortedCPUIDs[i] = i;
}
sortCPUIDByMaxFrequency(sortedCPUIDs, &littleClusterOffset);
}
printf("max core:");
for (cpu_id = 0; cpu_id < littleClusterOffset; cpu_id++)
{
printf("%d ", sortedCPUIDs[cpu_id]);
CPU_SET(sortedCPUIDs[cpu_id], &mask);
}
printf("\n");
int sys_call_res = syscall(__NR_sched_setaffinity, gettid(), sizeof(mask), &mask);
//LOG_MCNN_CL_INF("sys call res = %d\n", sys_call_res);
if (sys_call_res)
{
printf("set_cpu_affinity errno = %d\n", (int)errno);
}
#endif
}
#if TARGET_OS_IPHONE
void iosBenchAll(const char* modelPath) {
std::cout << "MNN benchmark" << std::endl;
int loop = 20;
int warmup = 10;
MNNForwardType forward = MNN_FORWARD_CPU;
forward = MNN_FORWARD_NN;
int numberThread = 4;
int precision = 2;
std::cout << "Forward type: **" << forwardType(forward) << "** thread=" << numberThread << "** precision=" <<precision << std::endl;
std::vector<Model> models = findModelFiles(modelPath);
std::cout << "--------> Benchmarking... loop = " << loop << ", warmup = " << warmup << std::endl;
for (auto& m : models) {
std::vector<float> costs = doBench(m, loop, warmup, forward, false, numberThread, precision);
displayStats(m.name, costs);
}
}
#else
int main(int argc, const char* argv[]) {
std::cout << "MNN benchmark" << std::endl;
int loop = 10;
int warmup = 10;
MNNForwardType forward = MNN_FORWARD_CPU;
int testQuantizedModel = 0;
int numberThread = 4;
int precision = 2;
float sparsity = 0.0f;
int sparseBlockOC = 1;
bool enableKleidiAI = false;
if (argc <= 2) {
std::cout << "Usage: " << argv[0] << " models_folder [loop_count] [warmup] [forwardtype] [numberThread] [precision] [weightSparsity] [testQuantizedModel] [enableKleidiAI]" << std::endl;
return 1;
}
if (argc >= 3) {
loop = atoi(argv[2]);
}
if (argc >= 4) {
warmup = atoi(argv[3]);
}
if (argc >= 5) {
forward = static_cast<MNNForwardType>(atoi(argv[4]));
}
if (argc >= 6) {
numberThread = atoi(argv[5]);
}
if (argc >= 7) {
precision = atoi(argv[6]);
}
if (argc >= 8) {
sparsity = atof(argv[7]);
}
if(argc >= 9) {
sparseBlockOC = atoi(argv[8]);
}
if(argc >= 10) {
testQuantizedModel = atoi(argv[9]);
}
if (argc >= 11) {
enableKleidiAI = atoi(argv[10]) > 0 ? true : false;
}
std::cout << "Forward type: " << forwardType(forward) << " thread=" << numberThread << " precision=" <<precision << " sparsity=" <<sparsity << " sparseBlockOC=" << sparseBlockOC << " testQuantizedModel=" << testQuantizedModel << " enableKleidiAI=" << enableKleidiAI << std::endl;
std::vector<Model> models = findModelFiles(argv[1]);
std::cout << "--------> Benchmarking... loop = " << argv[2] << ", warmup = " << warmup << std::endl;
std::string fpInfType = "precision!=2, use fp32 inference.";
if (precision == 2) {
fpInfType = "precision=2, use fp16 inference if your device supports and open MNN_ARM82=ON.";
}
MNN_PRINT("[-INFO-]: %s\n", fpInfType.c_str());
if (testQuantizedModel) {
MNN_PRINT("[-INFO-]: Auto set sparsity=0 when test quantized model in benchmark...\n");
}
/* not called yet */
// set_cpu_affinity();
if (testQuantizedModel) {
printf("Auto set sparsity=0 when test quantized model in benchmark...\n");
}
for (auto& m : models) {
std::vector<float> costs = doBench(m, loop, warmup, forward, false, numberThread, precision, sparsity, sparseBlockOC, false, enableKleidiAI);
displayStats(m.name.c_str(), costs, false);
if (testQuantizedModel) {
costs = doBench(m, loop, warmup, forward, false, numberThread, precision, sparsity, sparseBlockOC, true, enableKleidiAI);
displayStats(m.name, costs, 1);
}
}
}
#endif