mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			211 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			211 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  getPerformance.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2019/03/12.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include <string.h>
 | |
| #include <chrono>
 | |
| #include <cstdint>
 | |
| #include <vector>
 | |
| #include <stdlib.h>
 | |
| #include "MNNDefine.h"
 | |
| #include "Macro.h"
 | |
| #ifdef MNN_USE_NEON
 | |
| #include <arm_neon.h>
 | |
| #endif
 | |
| 
 | |
| class Timer {
 | |
| private:
 | |
|     std::chrono::high_resolution_clock::time_point inTime, outTime;
 | |
| 
 | |
| public:
 | |
|     void startTimer() {
 | |
|         inTime = std::chrono::high_resolution_clock::now();
 | |
|     }
 | |
| 
 | |
|     // unit ms
 | |
|     float getCostTimer() {
 | |
|         outTime = std::chrono::high_resolution_clock::now();
 | |
|         return (float)(std::chrono::duration_cast<std::chrono::microseconds>(outTime - inTime).count());
 | |
|     }
 | |
| };
 | |
| 
 | |
| int getCpuCounts() {
 | |
|     FILE* fp = fopen("/proc/cpuinfo", "rb");
 | |
|     if (fp == nullptr) {
 | |
|         MNN_PRINT("fopen error ! \n");
 | |
|         return 0;
 | |
|     }
 | |
|     int cpuCounts = 0;
 | |
|     char data[1024];
 | |
|     while (!feof(fp)) {
 | |
|         char* a = fgets(data, 1024, fp);
 | |
| 
 | |
|         if (a == nullptr) {
 | |
|             break;
 | |
|         }
 | |
|         if (memcmp(data, "processor", 9) == 0) {
 | |
|             cpuCounts++;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     fclose(fp);
 | |
|     fp = nullptr;
 | |
|     return cpuCounts;
 | |
| }
 | |
| 
 | |
| // 0 max 1 min 2 cur
 | |
| void getFreqKhz(int cpuid, std::vector<int>& freqVector) {
 | |
|     char path[256];
 | |
|     int freqKhz = -1;
 | |
|     // max
 | |
|     sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpuid);
 | |
|     FILE* fp = fopen(path, "rb");
 | |
|     if (nullptr == fp) {
 | |
|         MNN_PRINT("cpuinfo_max_freq fopen error ! \n");
 | |
|         freqVector.emplace_back(0);
 | |
|     } else {
 | |
|         fscanf(fp, "%d", &freqKhz);
 | |
|         fclose(fp);
 | |
|         freqVector.push_back(freqKhz);
 | |
|     }
 | |
| 
 | |
|     // min
 | |
|     sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_min_freq", cpuid);
 | |
|     fp = fopen(path, "rb");
 | |
|     if (nullptr == fp) {
 | |
|         MNN_PRINT("cpuinfo_min_freq fopen error ! \n");
 | |
|         freqVector.emplace_back(0);
 | |
|     } else {
 | |
|         freqKhz = -1;
 | |
|         fscanf(fp, "%d", &freqKhz);
 | |
|         fclose(fp);
 | |
|         freqVector.push_back(freqKhz);
 | |
|     }
 | |
| 
 | |
|     // cur
 | |
|     // sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_cur_freq", cpuid);
 | |
|     // fp = fopen(path, "rb");
 | |
|     // if(nullptr == fp){
 | |
|     //     MNN_PRINT("cpuinfo_cur_freq fopen error ! \n");
 | |
|     // }else{
 | |
|     //     freqKhz = -1;
 | |
|     //     fscanf(fp, "%d", &freqKhz);
 | |
|     //     fclose(fp);
 | |
|     //     freqVector.push_back(freqKhz);
 | |
|     // }
 | |
| }
 | |
| 
 | |
| void cpuFloatMlaTest(int32_t loopCounts) {
 | |
| #ifdef MNN_USE_NEON
 | |
| #ifndef __aarch64__
 | |
| 
 | |
|     __asm__ __volatile__(
 | |
|         "mov r12, %0\n"
 | |
|         "0:                             \n"
 | |
|         "vmla.f32   q15, q15, d0[0]        \n"
 | |
|         "vmla.f32   q14, q14, d0[1]        \n"
 | |
|         "vmla.f32   q13, q13, d1[0]        \n"
 | |
|         "vmla.f32   q12, q12, d1[1]        \n"
 | |
|         "vmla.f32   q11, q11, d2[0]        \n"
 | |
|         "vmla.f32   q10, q10, d2[1]        \n"
 | |
|         "vmla.f32   q9, q9, d3[0]        \n"
 | |
|         "vmla.f32   q8, q8, d3[1]        \n"
 | |
|         "vmla.f32   q7, q7, d4[0]        \n"
 | |
|         "vmla.f32   q6, q6, d4[1]        \n"
 | |
|         "vmla.f32   q5, q5, d5[0]        \n"
 | |
|         "vmla.f32   q4, q4, d5[1]        \n"
 | |
|         "vmla.f32   q3, q3, d6[0]        \n"
 | |
|         "subs       r12, r12, #1          \n"
 | |
|         "bne        0b                  \n"
 | |
|         :
 | |
|         : "r"(loopCounts)
 | |
|         : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q14", "q15"
 | |
|     );
 | |
| 
 | |
| #else
 | |
|     __asm__ __volatile__(
 | |
|         "mov w9, %w0\n"
 | |
|         "0:                           \n"
 | |
|         "fmla v31.4s, v31.4s, v0.s[0]\n"
 | |
|         "fmla v30.4s, v30.4s, v0.s[1]\n"
 | |
|         "fmla v29.4s, v29.4s, v0.s[2]\n"
 | |
|         "fmla v28.4s, v28.4s, v0.s[3]\n"
 | |
|         "fmla v27.4s, v27.4s, v1.s[0]\n"
 | |
|         "fmla v26.4s, v26.4s, v1.s[1]\n"
 | |
|         "fmla v25.4s, v25.4s, v1.s[2]\n"
 | |
|         "fmla v24.4s, v24.4s, v1.s[3]\n"
 | |
|         "fmla v23.4s, v23.4s, v3.s[0]\n"
 | |
|         "fmla v22.4s, v22.4s, v3.s[1]\n"
 | |
|         "fmla v21.4s, v21.4s, v3.s[2]\n"
 | |
|         "fmla v20.4s, v20.4s, v3.s[3]\n"
 | |
|         "fmla v19.4s, v19.4s, v4.s[0]\n"
 | |
|         "fmla v18.4s, v18.4s, v4.s[1]\n"
 | |
|         "fmla v17.4s, v17.4s, v4.s[2]\n"
 | |
|         "fmla v16.4s, v16.4s, v4.s[3]\n"
 | |
|         "fmla v15.4s, v15.4s, v5.s[0]\n"
 | |
|         "fmla v14.4s, v14.4s, v5.s[1]\n"
 | |
|         "fmla v13.4s, v13.4s, v5.s[2]\n"
 | |
|         "fmla v12.4s, v12.4s, v5.s[3]\n"
 | |
|         "fmla v11.4s, v11.4s, v6.s[0]\n"
 | |
|         "fmla v10.4s, v10.4s, v6.s[1]\n"
 | |
|         "fmla v9.4s, v9.4s, v6.s[2]\n"
 | |
|         "fmla v8.4s, v8.4s, v6.s[3]\n"
 | |
|         "fmla v7.4s, v7.4s, v2.s[0]\n"
 | |
|         "subs       w9, w9, #1          \n"
 | |
|         "bne        0b                  \n"
 | |
|         :
 | |
|         : "r"(loopCounts)
 | |
|         : "cc", "memory", "w9", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15"
 | |
|     );
 | |
| #endif
 | |
| #endif
 | |
| }
 | |
| 
 | |
| void cpuFLOPSPerformance() {
 | |
|     int32_t loopCounts = 100000000;
 | |
|     MNN_PRINT("CPU PERFORMANCE -> loopCounts : %d \n", loopCounts);
 | |
| 
 | |
|     std::vector<int> freqVector;
 | |
|     for (int i = 0; i < getCpuCounts(); i++) {
 | |
|         freqVector.clear();
 | |
|         getFreqKhz(i, freqVector);
 | |
|         MNN_PRINT("core %d : max : %d, min : %d \n",i, freqVector.at(0), freqVector.at(1));
 | |
|     }
 | |
| 
 | |
|     // warm up
 | |
|     cpuFloatMlaTest(loopCounts);
 | |
| 
 | |
|     Timer timeInstance;
 | |
|     timeInstance.startTimer();
 | |
|     cpuFloatMlaTest(loopCounts);
 | |
| #ifdef MNN_USE_NEON
 | |
| #ifndef __aarch64__
 | |
|     auto number = (double)loopCounts * 13;
 | |
| #else 
 | |
|     auto number = (double)loopCounts * 25;
 | |
| #endif
 | |
| #else
 | |
|     auto number = 0.0;
 | |
| #endif
 | |
|     //FUNC_PRINT(number);
 | |
|     float costTime_ms = timeInstance.getCostTimer();
 | |
|     double costTime_s = (double)(costTime_ms) / 1000000.0f;
 | |
|     // MNN_PRINT("cost time : %f \n", costTime_s);
 | |
|     double mlaCounts_g = number * 4 / 1000000000.0f;
 | |
|     float gflops       = mlaCounts_g / costTime_s;
 | |
|     MNN_PRINT(" ======================== float ===============================\n");
 | |
|     MNN_PRINT("CPU float gflops : %f\n", gflops);
 | |
| }
 | |
| 
 | |
| int main(int argc, const char* argv[]) {
 | |
|     MNN_PRINT("Start PERFORMANCE !!! \n");
 | |
| 
 | |
|     cpuFLOPSPerformance();
 | |
| 
 | |
|     return 0;
 | |
| }
 |