mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			458 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			458 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
//
 | 
						||
//  benchmark.cpp
 | 
						||
//  MNN
 | 
						||
//
 | 
						||
//  Created by MNN on 2019/01/31.
 | 
						||
//  Copyright © 2018, Alibaba Group Holding Limited
 | 
						||
//
 | 
						||
 | 
						||
#include <errno.h>
 | 
						||
#include <float.h>
 | 
						||
#include <math.h>
 | 
						||
#include <stdio.h>
 | 
						||
#include <stdlib.h>
 | 
						||
#include <string.h>
 | 
						||
#include <cstring>
 | 
						||
#include <fstream>
 | 
						||
#include <iostream>
 | 
						||
#include <vector>
 | 
						||
#if defined(_MSC_VER)
 | 
						||
#include <Windows.h>
 | 
						||
#undef min
 | 
						||
#undef max
 | 
						||
#else
 | 
						||
#include <sys/time.h>
 | 
						||
#include <sys/stat.h>
 | 
						||
#include <sys/types.h>
 | 
						||
#include <dirent.h>
 | 
						||
#endif
 | 
						||
 | 
						||
#include "core/Backend.hpp"
 | 
						||
#include <MNN/Interpreter.hpp>
 | 
						||
#include <MNN/MNNDefine.h>
 | 
						||
#include <MNN/Tensor.hpp>
 | 
						||
#include <MNN/AutoTime.hpp>
 | 
						||
#include "revertMNNModel.hpp"
 | 
						||
 | 
						||
/**
 | 
						||
 TODOs:
 | 
						||
 1. dynamically get CPU related info.
 | 
						||
 2. iOS support
 | 
						||
 */
 | 
						||
struct Model {
 | 
						||
    std::string name;
 | 
						||
    std::string model_file;
 | 
						||
};
 | 
						||
 | 
						||
#if !defined(_MSC_VER)
 | 
						||
inline bool file_exist(const char* file) {
 | 
						||
    struct stat buffer;
 | 
						||
    return stat(file, &buffer) == 0;
 | 
						||
}
 | 
						||
#endif
 | 
						||
 | 
						||
std::vector<Model> findModelFiles(const char* dir) {
 | 
						||
    std::vector<Model> models;
 | 
						||
#if defined(_MSC_VER)
 | 
						||
    WIN32_FIND_DATA ffd;
 | 
						||
    HANDLE hFind = INVALID_HANDLE_VALUE;
 | 
						||
    std::string mnn_model_pattern = std::string(dir) + "\\*.mnn";
 | 
						||
    hFind = FindFirstFile(mnn_model_pattern.c_str(), &ffd);
 | 
						||
    if (INVALID_HANDLE_VALUE == hFind) {
 | 
						||
        std::cout << "open " << dir << " failed: " << strerror(errno) << std::endl;
 | 
						||
        return models;
 | 
						||
    }
 | 
						||
    do {
 | 
						||
        Model m;
 | 
						||
        m.name       = ffd.cFileName;
 | 
						||
        m.model_file = std::string(dir) + "\\" + m.name;
 | 
						||
        if(INVALID_FILE_ATTRIBUTES != GetFileAttributes(m.model_file.c_str()) && GetLastError() != ERROR_FILE_NOT_FOUND) {
 | 
						||
            models.push_back(std::move(m));
 | 
						||
        }
 | 
						||
    } while (FindNextFile(hFind, &ffd) != 0);
 | 
						||
    FindClose(hFind);
 | 
						||
#else
 | 
						||
    DIR* root;
 | 
						||
    if ((root = opendir(dir)) == NULL) {
 | 
						||
        std::cout << "open " << dir << " failed: " << strerror(errno) << std::endl;
 | 
						||
        return models;
 | 
						||
    }
 | 
						||
 | 
						||
    struct dirent* ent;
 | 
						||
    while ((ent = readdir(root)) != NULL) {
 | 
						||
        Model m;
 | 
						||
        if (ent->d_name[0] != '.') {
 | 
						||
            m.name       = ent->d_name;
 | 
						||
            m.model_file = std::string(dir) + "/" + m.name;
 | 
						||
            if (file_exist(m.model_file.c_str())) {
 | 
						||
                models.push_back(std::move(m));
 | 
						||
            }
 | 
						||
        }
 | 
						||
    }
 | 
						||
    closedir(root);
 | 
						||
#endif
 | 
						||
    return models;
 | 
						||
}
 | 
						||
 | 
						||
void setInputData(MNN::Tensor* tensor) {
 | 
						||
    float* data = tensor->host<float>();
 | 
						||
    Revert::fillRandValue(data, tensor->elementSize());
 | 
						||
}
 | 
						||
 | 
						||
static inline uint64_t getTimeInUs() {
 | 
						||
    uint64_t time;
 | 
						||
#if defined(_MSC_VER)
 | 
						||
    LARGE_INTEGER now, freq;
 | 
						||
    QueryPerformanceCounter(&now);
 | 
						||
    QueryPerformanceFrequency(&freq);
 | 
						||
    uint64_t sec = now.QuadPart / freq.QuadPart;
 | 
						||
    uint64_t usec = (now.QuadPart % freq.QuadPart) * 1000000 / freq.QuadPart;
 | 
						||
    time = sec * 1000000 + usec;
 | 
						||
#else
 | 
						||
    struct timeval tv;
 | 
						||
    gettimeofday(&tv, nullptr);
 | 
						||
    time = static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
 | 
						||
#endif
 | 
						||
    return time;
 | 
						||
}
 | 
						||
 | 
						||
std::vector<float> doBench(Model& model, int loop, int warmup = 10, int forward = MNN_FORWARD_CPU, bool only_inference = true,
 | 
						||
                           int numberThread = 4, int precision = 2, float sparsity = 0.0f, int sparseBlockOC = 1, bool testQuantModel=false, bool enableKleidiAI=false) {
 | 
						||
    auto revertor = std::unique_ptr<Revert>(new Revert(model.model_file.c_str()));
 | 
						||
    if (testQuantModel) {
 | 
						||
        revertor->initialize(0, sparseBlockOC, false, true);
 | 
						||
    } else {
 | 
						||
        revertor->initialize(sparsity, sparseBlockOC);
 | 
						||
    }
 | 
						||
 | 
						||
    auto modelBuffer      = revertor->getBuffer();
 | 
						||
    const auto bufferSize = revertor->getBufferSize();
 | 
						||
    auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize), MNN::Interpreter::destroy);
 | 
						||
    revertor.reset();
 | 
						||
    net->setSessionMode(MNN::Interpreter::Session_Release);
 | 
						||
    net->setSessionHint(MNN::Interpreter::HintMode::CPU_ENABLE_KLEIDIAI, enableKleidiAI);
 | 
						||
    MNN::ScheduleConfig config;
 | 
						||
    config.numThread = numberThread;
 | 
						||
    config.type      = static_cast<MNNForwardType>(forward);
 | 
						||
    MNN::BackendConfig backendConfig;
 | 
						||
    backendConfig.precision = (MNN::BackendConfig::PrecisionMode)precision;
 | 
						||
    backendConfig.power = MNN::BackendConfig::Power_High;
 | 
						||
    config.backendConfig = &backendConfig;
 | 
						||
 | 
						||
    std::vector<float> costs;
 | 
						||
    MNN::Session* session = net->createSession(config);
 | 
						||
 | 
						||
    MNN::Tensor* input    = net->getSessionInput(session, NULL);
 | 
						||
 | 
						||
    // if the model has not the input dimension, umcomment the below code to set the input dims
 | 
						||
    // std::vector<int> dims{1, 3, 224, 224};
 | 
						||
    // net->resizeTensor(input, dims);
 | 
						||
    // net->resizeSession(session);
 | 
						||
 | 
						||
    net->releaseModel();
 | 
						||
 | 
						||
    const MNN::Backend* inBackend = net->getBackend(session, input);
 | 
						||
 | 
						||
    std::shared_ptr<MNN::Tensor> givenTensor(MNN::Tensor::createHostTensorFromDevice(input, false));
 | 
						||
 | 
						||
    auto outputTensor = net->getSessionOutput(session, NULL);
 | 
						||
    std::shared_ptr<MNN::Tensor> expectTensor(MNN::Tensor::createHostTensorFromDevice(outputTensor, false));
 | 
						||
    // Warming up...
 | 
						||
    for (int i = 0; i < warmup; ++i) {
 | 
						||
        void* host = input->map(MNN::Tensor::MAP_TENSOR_WRITE,  input->getDimensionType());
 | 
						||
        input->unmap(MNN::Tensor::MAP_TENSOR_WRITE,  input->getDimensionType(), host);
 | 
						||
 | 
						||
        net->runSession(session);
 | 
						||
 | 
						||
        host = outputTensor->map(MNN::Tensor::MAP_TENSOR_READ,  outputTensor->getDimensionType());
 | 
						||
        outputTensor->unmap(MNN::Tensor::MAP_TENSOR_READ,  outputTensor->getDimensionType(), host);
 | 
						||
    }
 | 
						||
 | 
						||
    for (int round = 0; round < loop; round++) {
 | 
						||
        MNN::Timer _t;
 | 
						||
        void* host = input->map(MNN::Tensor::MAP_TENSOR_WRITE,  input->getDimensionType());
 | 
						||
        input->unmap(MNN::Tensor::MAP_TENSOR_WRITE,  input->getDimensionType(), host);
 | 
						||
        net->runSession(session);
 | 
						||
        host = outputTensor->map(MNN::Tensor::MAP_TENSOR_READ,  outputTensor->getDimensionType());
 | 
						||
        outputTensor->unmap(MNN::Tensor::MAP_TENSOR_READ,  outputTensor->getDimensionType(), host);
 | 
						||
        auto time = (float)_t.durationInUs() / 1000.0f;
 | 
						||
        costs.push_back(time);
 | 
						||
    }
 | 
						||
    return costs;
 | 
						||
}
 | 
						||
 | 
						||
void displayStats(const std::string& name, const std::vector<float>& costs, int quant = 0) {
 | 
						||
    float max = 0, min = FLT_MAX, sum = 0, avg;
 | 
						||
    for (auto v : costs) {
 | 
						||
        max = fmax(max, v);
 | 
						||
        min = fmin(min, v);
 | 
						||
        sum += v;
 | 
						||
        //printf("[ - ] cost:%f ms\n", v);
 | 
						||
    }
 | 
						||
    avg = costs.size() > 0 ? sum / costs.size() : 0;
 | 
						||
    std::string model = name;
 | 
						||
    if (quant == 1) {
 | 
						||
        model = "quant-" + name;
 | 
						||
    }
 | 
						||
    printf("[ - ] %-24s    max = %8.3f ms  min = %8.3f ms  avg = %8.3f ms\n", model.c_str(), max, avg == 0 ? 0 : min, avg);
 | 
						||
}
 | 
						||
static inline std::string forwardType(MNNForwardType type) {
 | 
						||
    switch (type) {
 | 
						||
        case MNN_FORWARD_CPU:
 | 
						||
            return "CPU";
 | 
						||
        case MNN_FORWARD_VULKAN:
 | 
						||
            return "Vulkan";
 | 
						||
        case MNN_FORWARD_OPENCL:
 | 
						||
            return "OpenCL";
 | 
						||
        case MNN_FORWARD_METAL:
 | 
						||
            return "Metal";
 | 
						||
        default:
 | 
						||
            break;
 | 
						||
    }
 | 
						||
    return "N/A";
 | 
						||
}
 | 
						||
 | 
						||
 | 
						||
 | 
						||
#ifdef __ANDROID__
 | 
						||
#include <errno.h>
 | 
						||
#include <unistd.h>
 | 
						||
#include <sys/prctl.h>
 | 
						||
#include <sys/syscall.h>
 | 
						||
 | 
						||
#define BUFFER_SIZE 1024
 | 
						||
 | 
						||
static uint32_t getNumberOfCPU() {
 | 
						||
    FILE* fp = fopen("/proc/cpuinfo", "rb");
 | 
						||
    if (!fp) {
 | 
						||
        return 1;
 | 
						||
    }
 | 
						||
    uint32_t number = 0;
 | 
						||
    char buffer[BUFFER_SIZE];
 | 
						||
    while (!feof(fp)) {
 | 
						||
        char* str = fgets(buffer, BUFFER_SIZE, fp);
 | 
						||
        if (!str) {
 | 
						||
            break;
 | 
						||
        }
 | 
						||
        if (memcmp(buffer, "processor", 9) == 0) {
 | 
						||
            number++;
 | 
						||
        }
 | 
						||
    }
 | 
						||
    fclose(fp);
 | 
						||
    if (number < 1) {
 | 
						||
        number = 1;
 | 
						||
    }
 | 
						||
    return number;
 | 
						||
}
 | 
						||
 | 
						||
static int getCPUMaxFreqKHz(int cpuID) {
 | 
						||
    char path[256];
 | 
						||
    sprintf(path, "/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state", cpuID);
 | 
						||
    FILE* fp = fopen(path, "rb");
 | 
						||
    if (!fp) {
 | 
						||
        sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state", cpuID);
 | 
						||
        fp = fopen(path, "rb");
 | 
						||
        if (!fp) {
 | 
						||
            sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpuID);
 | 
						||
            fp = fopen(path, "rb");
 | 
						||
            if (!fp) {
 | 
						||
                return -1;
 | 
						||
            }
 | 
						||
            int maxfrequency = -1;
 | 
						||
            fscanf(fp, "%d", &maxfrequency);
 | 
						||
            fclose(fp);
 | 
						||
            return maxfrequency;
 | 
						||
        }
 | 
						||
    }
 | 
						||
    int maxfrequency = 0;
 | 
						||
    while (!feof(fp)) {
 | 
						||
        int frequency = 0;
 | 
						||
        int history   = fscanf(fp, "%d %*d", &frequency);
 | 
						||
        if (history != 1) {
 | 
						||
            break;
 | 
						||
        }
 | 
						||
        if (frequency > maxfrequency) {
 | 
						||
            maxfrequency = frequency;
 | 
						||
        }
 | 
						||
    }
 | 
						||
    fclose(fp);
 | 
						||
    return maxfrequency;
 | 
						||
}
 | 
						||
 | 
						||
static int sortCPUIDByMaxFrequency(std::vector<int>& cpuIDs, int* littleClusterOffset) {
 | 
						||
    const int cpuNumbers = cpuIDs.size();
 | 
						||
    *littleClusterOffset = 0;
 | 
						||
    if (cpuNumbers == 0) {
 | 
						||
        return 0;
 | 
						||
    }
 | 
						||
    std::vector<int> cpusFrequency;
 | 
						||
    cpusFrequency.resize(cpuNumbers);
 | 
						||
    for (int i = 0; i < cpuNumbers; ++i) {
 | 
						||
        int frequency    = getCPUMaxFreqKHz(i);
 | 
						||
        cpuIDs[i]        = i;
 | 
						||
        cpusFrequency[i] = frequency;
 | 
						||
        // MNN_PRINT("cpu fre: %d, %d\n", i, frequency);
 | 
						||
    }
 | 
						||
    for (int i = 0; i < cpuNumbers; ++i) {
 | 
						||
        for (int j = i + 1; j < cpuNumbers; ++j) {
 | 
						||
            if (cpusFrequency[i] < cpusFrequency[j]) {
 | 
						||
                // id
 | 
						||
                int temp  = cpuIDs[i];
 | 
						||
                cpuIDs[i] = cpuIDs[j];
 | 
						||
                cpuIDs[j] = temp;
 | 
						||
                // frequency
 | 
						||
                temp             = cpusFrequency[i];
 | 
						||
                cpusFrequency[i] = cpusFrequency[j];
 | 
						||
                cpusFrequency[j] = temp;
 | 
						||
            }
 | 
						||
        }
 | 
						||
    }
 | 
						||
    int midMaxFrequency = (cpusFrequency.front() + cpusFrequency.back()) / 2;
 | 
						||
    if (midMaxFrequency == cpusFrequency.back()) {
 | 
						||
        return 0;
 | 
						||
    }
 | 
						||
    for (int i = 0; i < cpuNumbers; ++i) {
 | 
						||
        if (cpusFrequency[i] < midMaxFrequency) {
 | 
						||
            *littleClusterOffset = i;
 | 
						||
            break;
 | 
						||
        }
 | 
						||
    }
 | 
						||
    return 0;
 | 
						||
}
 | 
						||
 | 
						||
 | 
						||
//#define CPU_SETSIZE 1024
 | 
						||
#define __NCPUBITS  (8 * sizeof (unsigned long))
 | 
						||
 | 
						||
#endif
 | 
						||
 | 
						||
void set_cpu_affinity()
 | 
						||
{
 | 
						||
#ifdef __ANDROID__
 | 
						||
    int cpu_core_num = sysconf(_SC_NPROCESSORS_CONF);
 | 
						||
    //LOG_MCNN_CL_INF("cpu core num = %d\n", cpu_core_num);
 | 
						||
    int cpu_id = 0;
 | 
						||
    cpu_set_t mask;
 | 
						||
    CPU_ZERO(&mask);
 | 
						||
 | 
						||
    auto numberOfCPUs = getNumberOfCPU();
 | 
						||
    static std::vector<int> sortedCPUIDs;
 | 
						||
    static int littleClusterOffset = 0;
 | 
						||
    if (sortedCPUIDs.empty()) {
 | 
						||
        sortedCPUIDs.resize(numberOfCPUs);
 | 
						||
        for (int i = 0; i < numberOfCPUs; ++i) {
 | 
						||
            sortedCPUIDs[i] = i;
 | 
						||
        }
 | 
						||
        sortCPUIDByMaxFrequency(sortedCPUIDs, &littleClusterOffset);
 | 
						||
    }
 | 
						||
 | 
						||
    printf("max core:");
 | 
						||
    for (cpu_id = 0; cpu_id < littleClusterOffset; cpu_id++)
 | 
						||
    {
 | 
						||
        printf("%d ", sortedCPUIDs[cpu_id]);
 | 
						||
        CPU_SET(sortedCPUIDs[cpu_id], &mask);
 | 
						||
    }
 | 
						||
    printf("\n");
 | 
						||
 | 
						||
 | 
						||
    int sys_call_res = syscall(__NR_sched_setaffinity, gettid(), sizeof(mask), &mask);
 | 
						||
    //LOG_MCNN_CL_INF("sys call res = %d\n", sys_call_res);
 | 
						||
    if (sys_call_res)
 | 
						||
    {
 | 
						||
        printf("set_cpu_affinity errno = %d\n", (int)errno);
 | 
						||
    }
 | 
						||
#endif
 | 
						||
}
 | 
						||
 | 
						||
#if TARGET_OS_IPHONE
 | 
						||
void iosBenchAll(const char* modelPath) {
 | 
						||
    std::cout << "MNN benchmark" << std::endl;
 | 
						||
    int loop               = 20;
 | 
						||
    int warmup             = 10;
 | 
						||
    MNNForwardType forward = MNN_FORWARD_CPU;
 | 
						||
    forward = MNN_FORWARD_NN;
 | 
						||
    int numberThread       = 4;
 | 
						||
    int precision = 2;
 | 
						||
    std::cout << "Forward type: **" << forwardType(forward) << "** thread=" << numberThread << "** precision=" <<precision << std::endl;
 | 
						||
    std::vector<Model> models = findModelFiles(modelPath);
 | 
						||
    std::cout << "--------> Benchmarking... loop = " << loop << ", warmup = " << warmup << std::endl;
 | 
						||
 | 
						||
    for (auto& m : models) {
 | 
						||
        std::vector<float> costs = doBench(m, loop, warmup, forward, false, numberThread, precision);
 | 
						||
        displayStats(m.name, costs);
 | 
						||
    }
 | 
						||
}
 | 
						||
#else
 | 
						||
int main(int argc, const char* argv[]) {
 | 
						||
    std::cout << "MNN benchmark" << std::endl;
 | 
						||
    int loop               = 10;
 | 
						||
    int warmup             = 10;
 | 
						||
    MNNForwardType forward = MNN_FORWARD_CPU;
 | 
						||
    int testQuantizedModel = 0;
 | 
						||
    int numberThread       = 4;
 | 
						||
    int precision = 2;
 | 
						||
    float sparsity = 0.0f;
 | 
						||
    int sparseBlockOC = 1;
 | 
						||
    bool enableKleidiAI = false;
 | 
						||
    if (argc <= 2) {
 | 
						||
        std::cout << "Usage: " << argv[0] << " models_folder [loop_count] [warmup] [forwardtype] [numberThread] [precision] [weightSparsity] [testQuantizedModel] [enableKleidiAI]" << std::endl;
 | 
						||
        return 1;
 | 
						||
    }
 | 
						||
    if (argc >= 3) {
 | 
						||
        loop = atoi(argv[2]);
 | 
						||
    }
 | 
						||
    if (argc >= 4) {
 | 
						||
        warmup = atoi(argv[3]);
 | 
						||
    }
 | 
						||
    if (argc >= 5) {
 | 
						||
        forward = static_cast<MNNForwardType>(atoi(argv[4]));
 | 
						||
    }
 | 
						||
    if (argc >= 6) {
 | 
						||
        numberThread = atoi(argv[5]);
 | 
						||
    }
 | 
						||
    if (argc >= 7) {
 | 
						||
        precision = atoi(argv[6]);
 | 
						||
    }
 | 
						||
    if (argc >= 8) {
 | 
						||
        sparsity = atof(argv[7]);
 | 
						||
    }
 | 
						||
    if(argc >= 9) {
 | 
						||
        sparseBlockOC = atoi(argv[8]);
 | 
						||
    }
 | 
						||
    if(argc >= 10) {
 | 
						||
        testQuantizedModel = atoi(argv[9]);
 | 
						||
    }
 | 
						||
    if (argc >= 11) {
 | 
						||
        enableKleidiAI = atoi(argv[10]) > 0 ? true : false;
 | 
						||
    }
 | 
						||
 | 
						||
    std::cout << "Forward type: " << forwardType(forward) << " thread=" << numberThread << " precision=" <<precision << " sparsity=" <<sparsity << " sparseBlockOC=" << sparseBlockOC << " testQuantizedModel=" << testQuantizedModel << " enableKleidiAI=" << enableKleidiAI << std::endl;
 | 
						||
    std::vector<Model> models = findModelFiles(argv[1]);
 | 
						||
 | 
						||
    std::cout << "--------> Benchmarking... loop = " << argv[2] << ", warmup = " << warmup << std::endl;
 | 
						||
    std::string fpInfType = "precision!=2, use fp32 inference.";
 | 
						||
    if (precision == 2) {
 | 
						||
        fpInfType = "precision=2, use fp16 inference if your device supports and open MNN_ARM82=ON.";
 | 
						||
    }
 | 
						||
    MNN_PRINT("[-INFO-]: %s\n", fpInfType.c_str());
 | 
						||
    if (testQuantizedModel) {
 | 
						||
        MNN_PRINT("[-INFO-]: Auto set sparsity=0 when test quantized model in benchmark...\n");
 | 
						||
    }
 | 
						||
 | 
						||
    /* not called yet */
 | 
						||
    // set_cpu_affinity();
 | 
						||
    if (testQuantizedModel) {
 | 
						||
        printf("Auto set sparsity=0 when test quantized model in benchmark...\n");
 | 
						||
    }
 | 
						||
 | 
						||
    for (auto& m : models) {
 | 
						||
        std::vector<float> costs = doBench(m, loop, warmup, forward, false, numberThread, precision, sparsity, sparseBlockOC, false, enableKleidiAI);
 | 
						||
        displayStats(m.name.c_str(), costs, false);
 | 
						||
        if (testQuantizedModel) {
 | 
						||
            costs = doBench(m, loop, warmup, forward, false, numberThread, precision, sparsity, sparseBlockOC, true, enableKleidiAI);
 | 
						||
            displayStats(m.name, costs, 1);
 | 
						||
        }
 | 
						||
    }
 | 
						||
}
 | 
						||
#endif
 |