| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  getPerformance.cpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2019/03/12.
 | 
					
						
							|  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <string.h>
 | 
					
						
							|  |  |  | #include <chrono>
 | 
					
						
							|  |  |  | #include <cstdint>
 | 
					
						
							|  |  |  | #include <vector>
 | 
					
						
							| 
									
										
										
										
											2019-06-10 21:08:55 +08:00
										 |  |  | #include <stdlib.h>
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | #include "MNNDefine.h"
 | 
					
						
							| 
									
										
										
										
											2019-05-05 20:27:57 +08:00
										 |  |  | #include "Macro.h"
 | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  | #include <arm_neon.h>
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | class Timer { | 
					
						
							|  |  |  | private: | 
					
						
							|  |  |  |     std::chrono::high_resolution_clock::time_point inTime, outTime; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     void startTimer() { | 
					
						
							|  |  |  |         inTime = std::chrono::high_resolution_clock::now(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // unit ms
 | 
					
						
							|  |  |  |     float getCostTimer() { | 
					
						
							|  |  |  |         outTime = std::chrono::high_resolution_clock::now(); | 
					
						
							|  |  |  |         return (float)(std::chrono::duration_cast<std::chrono::microseconds>(outTime - inTime).count()); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int getCpuCounts() { | 
					
						
							|  |  |  |     FILE* fp = fopen("/proc/cpuinfo", "rb"); | 
					
						
							|  |  |  |     if (fp == nullptr) { | 
					
						
							|  |  |  |         MNN_PRINT("fopen error ! \n"); | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |         return 0; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |     int cpuCounts = 0; | 
					
						
							|  |  |  |     char data[1024]; | 
					
						
							|  |  |  |     while (!feof(fp)) { | 
					
						
							|  |  |  |         char* a = fgets(data, 1024, fp); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (a == nullptr) { | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (memcmp(data, "processor", 9) == 0) { | 
					
						
							|  |  |  |             cpuCounts++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     fclose(fp); | 
					
						
							|  |  |  |     fp = nullptr; | 
					
						
							|  |  |  |     return cpuCounts; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // 0 max 1 min 2 cur
 | 
					
						
							|  |  |  | void getFreqKhz(int cpuid, std::vector<int>& freqVector) { | 
					
						
							|  |  |  |     char path[256]; | 
					
						
							|  |  |  |     int freqKhz = -1; | 
					
						
							|  |  |  |     // max
 | 
					
						
							|  |  |  |     sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpuid); | 
					
						
							|  |  |  |     FILE* fp = fopen(path, "rb"); | 
					
						
							|  |  |  |     if (nullptr == fp) { | 
					
						
							|  |  |  |         MNN_PRINT("cpuinfo_max_freq fopen error ! \n"); | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |         freqVector.emplace_back(0); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } else { | 
					
						
							|  |  |  |         fscanf(fp, "%d", &freqKhz); | 
					
						
							|  |  |  |         fclose(fp); | 
					
						
							|  |  |  |         freqVector.push_back(freqKhz); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // min
 | 
					
						
							|  |  |  |     sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_min_freq", cpuid); | 
					
						
							|  |  |  |     fp = fopen(path, "rb"); | 
					
						
							|  |  |  |     if (nullptr == fp) { | 
					
						
							|  |  |  |         MNN_PRINT("cpuinfo_min_freq fopen error ! \n"); | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |         freqVector.emplace_back(0); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } else { | 
					
						
							|  |  |  |         freqKhz = -1; | 
					
						
							|  |  |  |         fscanf(fp, "%d", &freqKhz); | 
					
						
							|  |  |  |         fclose(fp); | 
					
						
							|  |  |  |         freqVector.push_back(freqKhz); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // cur
 | 
					
						
							|  |  |  |     // sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_cur_freq", cpuid);
 | 
					
						
							|  |  |  |     // fp = fopen(path, "rb");
 | 
					
						
							|  |  |  |     // if(nullptr == fp){
 | 
					
						
							|  |  |  |     //     MNN_PRINT("cpuinfo_cur_freq fopen error ! \n");
 | 
					
						
							|  |  |  |     // }else{
 | 
					
						
							|  |  |  |     //     freqKhz = -1;
 | 
					
						
							|  |  |  |     //     fscanf(fp, "%d", &freqKhz);
 | 
					
						
							|  |  |  |     //     fclose(fp);
 | 
					
						
							|  |  |  |     //     freqVector.push_back(freqKhz);
 | 
					
						
							|  |  |  |     // }
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-22 20:13:46 +08:00
										 |  |  | void cpuFloatMlaTest(int32_t loopCounts) { | 
					
						
							| 
									
										
										
										
											2019-05-05 20:27:57 +08:00
										 |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  | #ifndef __aarch64__
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     __asm__ __volatile__( | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |         "mov r12, %0\n" | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |         "0:                             \n" | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |         "vmla.f32   q15, q15, d0[0]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q14, q14, d0[1]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q13, q13, d1[0]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q12, q12, d1[1]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q11, q11, d2[0]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q10, q10, d2[1]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q9, q9, d3[0]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q8, q8, d3[1]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q7, q7, d4[0]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q6, q6, d4[1]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q5, q5, d5[0]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q4, q4, d5[1]        \n" | 
					
						
							|  |  |  |         "vmla.f32   q3, q3, d6[0]        \n" | 
					
						
							|  |  |  |         "subs       r12, r12, #1          \n" | 
					
						
							|  |  |  |         "bne        0b                  \n" | 
					
						
							|  |  |  |         : | 
					
						
							|  |  |  |         : "r"(loopCounts) | 
					
						
							|  |  |  |         : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q14", "q15" | 
					
						
							|  |  |  |     ); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |     __asm__ __volatile__( | 
					
						
							|  |  |  |         "mov w9, %w0\n" | 
					
						
							|  |  |  |         "0:                           \n" | 
					
						
							|  |  |  |         "fmla v31.4s, v31.4s, v0.s[0]\n" | 
					
						
							|  |  |  |         "fmla v30.4s, v30.4s, v0.s[1]\n" | 
					
						
							|  |  |  |         "fmla v29.4s, v29.4s, v0.s[2]\n" | 
					
						
							|  |  |  |         "fmla v28.4s, v28.4s, v0.s[3]\n" | 
					
						
							|  |  |  |         "fmla v27.4s, v27.4s, v1.s[0]\n" | 
					
						
							|  |  |  |         "fmla v26.4s, v26.4s, v1.s[1]\n" | 
					
						
							|  |  |  |         "fmla v25.4s, v25.4s, v1.s[2]\n" | 
					
						
							|  |  |  |         "fmla v24.4s, v24.4s, v1.s[3]\n" | 
					
						
							|  |  |  |         "fmla v23.4s, v23.4s, v3.s[0]\n" | 
					
						
							|  |  |  |         "fmla v22.4s, v22.4s, v3.s[1]\n" | 
					
						
							|  |  |  |         "fmla v21.4s, v21.4s, v3.s[2]\n" | 
					
						
							|  |  |  |         "fmla v20.4s, v20.4s, v3.s[3]\n" | 
					
						
							|  |  |  |         "fmla v19.4s, v19.4s, v4.s[0]\n" | 
					
						
							|  |  |  |         "fmla v18.4s, v18.4s, v4.s[1]\n" | 
					
						
							|  |  |  |         "fmla v17.4s, v17.4s, v4.s[2]\n" | 
					
						
							|  |  |  |         "fmla v16.4s, v16.4s, v4.s[3]\n" | 
					
						
							|  |  |  |         "fmla v15.4s, v15.4s, v5.s[0]\n" | 
					
						
							|  |  |  |         "fmla v14.4s, v14.4s, v5.s[1]\n" | 
					
						
							|  |  |  |         "fmla v13.4s, v13.4s, v5.s[2]\n" | 
					
						
							|  |  |  |         "fmla v12.4s, v12.4s, v5.s[3]\n" | 
					
						
							|  |  |  |         "fmla v11.4s, v11.4s, v6.s[0]\n" | 
					
						
							|  |  |  |         "fmla v10.4s, v10.4s, v6.s[1]\n" | 
					
						
							|  |  |  |         "fmla v9.4s, v9.4s, v6.s[2]\n" | 
					
						
							|  |  |  |         "fmla v8.4s, v8.4s, v6.s[3]\n" | 
					
						
							|  |  |  |         "fmla v7.4s, v7.4s, v2.s[0]\n" | 
					
						
							|  |  |  |         "subs       w9, w9, #1          \n" | 
					
						
							|  |  |  |         "bne        0b                  \n" | 
					
						
							|  |  |  |         : | 
					
						
							|  |  |  |         : "r"(loopCounts) | 
					
						
							|  |  |  |         : "cc", "memory", "w9", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15" | 
					
						
							|  |  |  |     ); | 
					
						
							| 
									
										
										
										
											2019-05-05 20:27:57 +08:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void cpuFLOPSPerformance() { | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |     int32_t loopCounts = 100000000; | 
					
						
							|  |  |  |     MNN_PRINT("CPU PERFORMANCE -> loopCounts : %d \n", loopCounts); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     std::vector<int> freqVector; | 
					
						
							|  |  |  |     for (int i = 0; i < getCpuCounts(); i++) { | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |         freqVector.clear(); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |         getFreqKhz(i, freqVector); | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |         MNN_PRINT("core %d : max : %d, min : %d \n",i, freqVector.at(0), freqVector.at(1)); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // warm up
 | 
					
						
							| 
									
										
										
										
											2019-05-05 20:27:57 +08:00
										 |  |  |     cpuFloatMlaTest(loopCounts); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Timer timeInstance; | 
					
						
							|  |  |  |     timeInstance.startTimer(); | 
					
						
							| 
									
										
										
										
											2019-05-05 20:27:57 +08:00
										 |  |  |     cpuFloatMlaTest(loopCounts); | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  | #ifndef __aarch64__
 | 
					
						
							|  |  |  |     auto number = (double)loopCounts * 13; | 
					
						
							|  |  |  | #else 
 | 
					
						
							|  |  |  |     auto number = (double)loopCounts * 25; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     auto number = 0.0; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     //FUNC_PRINT(number);
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     float costTime_ms = timeInstance.getCostTimer(); | 
					
						
							|  |  |  |     double costTime_s = (double)(costTime_ms) / 1000000.0f; | 
					
						
							|  |  |  |     // MNN_PRINT("cost time : %f \n", costTime_s);
 | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |     double mlaCounts_g = number * 4 / 1000000000.0f; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     float gflops       = mlaCounts_g / costTime_s; | 
					
						
							| 
									
										
										
										
											2019-05-05 20:27:57 +08:00
										 |  |  |     MNN_PRINT(" ======================== float ===============================\n"); | 
					
						
							| 
									
										
										
										
											2019-08-07 16:44:09 +08:00
										 |  |  |     MNN_PRINT("CPU float gflops : %f\n", gflops); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int main(int argc, const char* argv[]) { | 
					
						
							|  |  |  |     MNN_PRINT("Start PERFORMANCE !!! \n"); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-05-05 20:27:57 +08:00
										 |  |  |     cpuFLOPSPerformance(); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } |