MNN/source/backend/cpu/CPURuntime.cpp

1580 lines
60 KiB
C++
Raw Normal View History

2019-04-17 10:49:11 +08:00
//
// CPURuntime.cpp
// MNN
//
// Created by MNN on 2018/08/31.
// Copyright © 2018, Alibaba Group Holding Limited
//
/**
Ref from:
https://github.com/Tencent/ncnn/blob/master/src/cpu.cpp
https://github.com/pytorch/cpuinfo
2019-04-17 10:49:11 +08:00
*/
#ifdef __ANDROID__
#include <stdint.h>
#include <sys/syscall.h>
#include <unistd.h>
#endif
#include "core/Macro.h"
#ifdef __ANDROID__
#include <fcntl.h>
2020-07-04 01:21:30 +08:00
#include <sys/auxv.h>
#include <sys/system_properties.h>
#endif // __ANDROID__
2019-04-17 10:49:11 +08:00
#if __APPLE__
#include "TargetConditionals.h"
#if __aarch64__
#include <sys/sysctl.h>
#endif
2019-04-17 10:49:11 +08:00
#if TARGET_OS_IPHONE
#include <mach/machine.h>
2020-07-04 01:21:30 +08:00
#include <sys/types.h>
2019-04-17 10:49:11 +08:00
#define __IOS__ 1
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
2019-04-17 10:49:11 +08:00
#ifdef _OPENMP
#include <omp.h>
#endif // _OPENMP
2020-07-04 01:21:30 +08:00
#include <MNN/MNNDefine.h>
2019-04-17 10:49:11 +08:00
#include <stdio.h>
#include <string.h>
#include <algorithm>
2020-07-04 01:21:30 +08:00
#include <vector>
2019-12-27 22:16:57 +08:00
#include "backend/cpu/CPURuntime.hpp"
2019-04-17 10:49:11 +08:00
#if defined (__linux__) && defined (__aarch64__)
#include <sys/auxv.h>
#define CPUINFO_ARM_LINUX_FEATURE_FPHP UINT32_C(0x00000200)
#define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP UINT32_C(0x00000400)
#define CPUINFO_ARM_LINUX_FEATURE_ASIMDDP UINT32_C(0x00100000)
#define CPUINFO_ARM_LINUX_FEATURE_I8MM UINT32_C(0x00002000)
#define CPUINFO_ARM_LINUX_FEATURE_SVE UINT32_C(0x00400000)
#define CPUINFO_ARM_LINUX_FEATURE_SVE2 UINT32_C(0x00000002)
#endif /* __linux__ && __aarch64__ */
2019-04-17 10:49:11 +08:00
#ifdef __ANDROID__
/* As per include/sys/system_properties.h in Android NDK */
#define CPUINFO_HARDWARE_VALUE_MAX 64
#define CPUINFO_BUILD_PROP_VALUE_MAX 92
struct cpuinfo_android_properties {
char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX];
char ro_product_board[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_board_platform[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_mediatek_platform[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_arch[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_chipname[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_hardware_chipname[CPUINFO_BUILD_PROP_VALUE_MAX];
};
enum cpuinfo_android_chipset_property {
cpuinfo_android_chipset_property_proc_cpuinfo_hardware = 0,
cpuinfo_android_chipset_property_ro_product_board,
cpuinfo_android_chipset_property_ro_board_platform,
cpuinfo_android_chipset_property_ro_mediatek_platform,
cpuinfo_android_chipset_property_ro_arch,
cpuinfo_android_chipset_property_ro_chipname,
cpuinfo_android_chipset_property_ro_hardware_chipname,
cpuinfo_android_chipset_property_max,
};
enum cpuinfo_arm_chipset_vendor {
cpuinfo_arm_chipset_vendor_unknown = 0,
cpuinfo_arm_chipset_vendor_qualcomm,
cpuinfo_arm_chipset_vendor_mediatek,
cpuinfo_arm_chipset_vendor_samsung,
cpuinfo_arm_chipset_vendor_hisilicon,
cpuinfo_arm_chipset_vendor_actions,
cpuinfo_arm_chipset_vendor_allwinner,
cpuinfo_arm_chipset_vendor_amlogic,
cpuinfo_arm_chipset_vendor_broadcom,
cpuinfo_arm_chipset_vendor_lg,
cpuinfo_arm_chipset_vendor_leadcore,
cpuinfo_arm_chipset_vendor_marvell,
cpuinfo_arm_chipset_vendor_mstar,
cpuinfo_arm_chipset_vendor_novathor,
cpuinfo_arm_chipset_vendor_nvidia,
cpuinfo_arm_chipset_vendor_pinecone,
cpuinfo_arm_chipset_vendor_renesas,
cpuinfo_arm_chipset_vendor_rockchip,
cpuinfo_arm_chipset_vendor_spreadtrum,
cpuinfo_arm_chipset_vendor_telechips,
cpuinfo_arm_chipset_vendor_texas_instruments,
cpuinfo_arm_chipset_vendor_wondermedia,
cpuinfo_arm_chipset_vendor_max,
};
enum cpuinfo_arm_chipset_series {
cpuinfo_arm_chipset_series_unknown = 0,
cpuinfo_arm_chipset_series_qualcomm_qsd,
cpuinfo_arm_chipset_series_qualcomm_msm,
cpuinfo_arm_chipset_series_qualcomm_apq,
cpuinfo_arm_chipset_series_qualcomm_snapdragon,
cpuinfo_arm_chipset_series_mediatek_mt,
cpuinfo_arm_chipset_series_samsung_exynos,
cpuinfo_arm_chipset_series_hisilicon_k3v,
cpuinfo_arm_chipset_series_hisilicon_hi,
cpuinfo_arm_chipset_series_hisilicon_kirin,
cpuinfo_arm_chipset_series_actions_atm,
cpuinfo_arm_chipset_series_allwinner_a,
cpuinfo_arm_chipset_series_amlogic_aml,
cpuinfo_arm_chipset_series_amlogic_s,
cpuinfo_arm_chipset_series_broadcom_bcm,
cpuinfo_arm_chipset_series_lg_nuclun,
cpuinfo_arm_chipset_series_leadcore_lc,
cpuinfo_arm_chipset_series_marvell_pxa,
cpuinfo_arm_chipset_series_mstar_6a,
cpuinfo_arm_chipset_series_novathor_u,
cpuinfo_arm_chipset_series_nvidia_tegra_t,
cpuinfo_arm_chipset_series_nvidia_tegra_ap,
cpuinfo_arm_chipset_series_nvidia_tegra_sl,
cpuinfo_arm_chipset_series_pinecone_surge_s,
cpuinfo_arm_chipset_series_renesas_mp,
cpuinfo_arm_chipset_series_rockchip_rk,
cpuinfo_arm_chipset_series_spreadtrum_sc,
cpuinfo_arm_chipset_series_telechips_tcc,
cpuinfo_arm_chipset_series_texas_instruments_omap,
cpuinfo_arm_chipset_series_wondermedia_wm,
cpuinfo_arm_chipset_series_max,
};
struct cpuinfo_arm_chipset {
enum cpuinfo_arm_chipset_vendor vendor;
enum cpuinfo_arm_chipset_series series;
uint32_t model;
char suffix[8];
};
#define BUFFER_SIZE 1024
static uint32_t getNumberOfCPU() {
2019-04-17 10:49:11 +08:00
FILE* fp = fopen("/proc/cpuinfo", "rb");
if (!fp) {
return 1;
}
uint32_t number = 0;
char buffer[BUFFER_SIZE];
2019-04-17 10:49:11 +08:00
while (!feof(fp)) {
char* str = fgets(buffer, BUFFER_SIZE, fp);
2019-04-17 10:49:11 +08:00
if (!str) {
break;
}
if (memcmp(buffer, "processor", 9) == 0) {
number++;
}
}
fclose(fp);
if (number < 1) {
number = 1;
}
return number;
}
static int getCPUMaxFreqKHz(int cpuID) {
char path[256];
sprintf(path, "/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state", cpuID);
FILE* fp = fopen(path, "rb");
if (!fp) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state", cpuID);
fp = fopen(path, "rb");
if (!fp) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpuID);
fp = fopen(path, "rb");
if (!fp) {
return -1;
}
int maxfrequency = -1;
fscanf(fp, "%d", &maxfrequency);
fclose(fp);
return maxfrequency;
}
}
int maxfrequency = 0;
while (!feof(fp)) {
int frequency = 0;
int history = fscanf(fp, "%d %*d", &frequency);
if (history != 1) {
break;
}
if (frequency > maxfrequency) {
maxfrequency = frequency;
}
}
fclose(fp);
return maxfrequency;
}
static int sortCPUIDByMaxFrequency(std::vector<int>& cpuIDs, int* littleClusterOffset) {
const int cpuNumbers = cpuIDs.size();
*littleClusterOffset = 0;
if (cpuNumbers == 0) {
return 0;
}
std::vector<int> cpusFrequency;
cpusFrequency.resize(cpuNumbers);
for (int i = 0; i < cpuNumbers; ++i) {
int frequency = getCPUMaxFreqKHz(i);
cpuIDs[i] = i;
cpusFrequency[i] = frequency;
// MNN_PRINT("cpu fre: %d, %d\n", i, frequency);
}
for (int i = 0; i < cpuNumbers; ++i) {
for (int j = i + 1; j < cpuNumbers; ++j) {
if (cpusFrequency[i] < cpusFrequency[j]) {
// id
int temp = cpuIDs[i];
cpuIDs[i] = cpuIDs[j];
cpuIDs[j] = temp;
// frequency
temp = cpusFrequency[i];
cpusFrequency[i] = cpusFrequency[j];
cpusFrequency[j] = temp;
}
}
}
int midMaxFrequency = (cpusFrequency.front() + cpusFrequency.back()) / 2;
if (midMaxFrequency == cpusFrequency.back()) {
return 0;
}
for (int i = 0; i < cpuNumbers; ++i) {
if (cpusFrequency[i] < midMaxFrequency) {
*littleClusterOffset = i;
break;
}
}
return 0;
}
static int setSchedAffinity(const std::vector<int>& cpuIDs) {
#define CPU_SETSIZE 1024
#define __NCPUBITS (8 * sizeof(unsigned long))
typedef struct {
unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
} cpu_set_t;
#define CPU_SET(cpu, cpusetp) ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
#define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t))
// set affinity for thread
#ifdef __GLIBC__
pid_t pid = syscall(SYS_gettid);
#else
#ifdef PI3
pid_t pid = getpid();
#else
pid_t pid = gettid();
#endif
#endif
cpu_set_t mask;
CPU_ZERO(&mask);
for (int i = 0; i < (int)cpuIDs.size(); i++) {
CPU_SET(cpuIDs[i], &mask);
}
int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
if (syscallret) {
MNN_PRINT("syscall error %d\n", syscallret);
return -1;
}
return 0;
}
#endif // arch
int MNNSetCPUThreadsMode(MNNCPUThreadsMode mode) {
#ifdef __ANDROID__
auto numberOfCPUs = getNumberOfCPU();
if (mode == MNN_CPU_MODE_DEFAULT) {
return 0;
}
static std::vector<int> sortedCPUIDs;
static int littleClusterOffset = 0;
if (sortedCPUIDs.empty()) {
sortedCPUIDs.resize(numberOfCPUs);
for (int i = 0; i < numberOfCPUs; ++i) {
sortedCPUIDs[i] = i;
}
sortCPUIDByMaxFrequency(sortedCPUIDs, &littleClusterOffset);
}
if (littleClusterOffset == 0 && mode != MNN_CPU_MODE_POWER_FRI) {
MNN_PRINT("This CPU Arch Do NOT support for setting cpu thread mode\n");
}
std::vector<int> cpuAttachIDs;
switch (mode) {
case MNN_CPU_MODE_POWER_FRI:
cpuAttachIDs = sortedCPUIDs;
break;
case MNN_CPU_MODE_LITTLE:
cpuAttachIDs = std::vector<int>(sortedCPUIDs.begin() + littleClusterOffset, sortedCPUIDs.end());
break;
case MNN_CPU_MODE_BIG:
cpuAttachIDs = std::vector<int>(sortedCPUIDs.begin(), sortedCPUIDs.begin() + littleClusterOffset);
break;
default:
cpuAttachIDs = sortedCPUIDs;
break;
}
#ifdef _OPENMP
const int threadsNumber = cpuAttachIDs.size();
omp_set_num_threads(threadsNumber);
std::vector<int> result(threadsNumber, 0);
#pragma omp parallel for
for (int i = 0; i < threadsNumber; ++i) {
result[i] = setSchedAffinity(cpuAttachIDs);
}
for (int i = 0; i < threadsNumber; ++i) {
if (result[i] != 0) {
return -1;
}
}
#else
int res = setSchedAffinity(cpuAttachIDs);
if (res != 0) {
return -1;
}
#endif // _OPENMP
return 0;
#elif __IOS__
return -1;
#else
return -1;
#endif // arch
}
float MNNGetCPUFlops(uint32_t number) {
float flops = 2048.0f;
#ifdef __ANDROID__
auto numberOfCPUs = getNumberOfCPU();
if (0 == numberOfCPUs) {
return flops;
}
std::vector<int> freqs;
freqs.resize(numberOfCPUs);
for (int i = 0; i < numberOfCPUs; ++i) {
2020-07-04 01:21:30 +08:00
freqs[i] = getCPUMaxFreqKHz(i);
}
std::sort(freqs.rbegin(), freqs.rend());
number = std::min(number, numberOfCPUs);
2020-07-04 01:21:30 +08:00
flops = 0.0f;
for (uint32_t i = 0; i < number; ++i) {
flops += (float)freqs[i] / 1024.0f;
}
#endif
return flops;
}
// cpuinfo
// Reference from: https://github.com/pytorch/cpuinfo
#ifdef __ANDROID__
2020-07-04 01:21:30 +08:00
#define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000)
#define CPUINFO_ARM_MIDR_VARIANT_MASK UINT32_C(0x00F00000)
#define CPUINFO_ARM_MIDR_ARCHITECTURE_MASK UINT32_C(0x000F0000)
2020-07-04 01:21:30 +08:00
#define CPUINFO_ARM_MIDR_PART_MASK UINT32_C(0x0000FFF0)
#define CPUINFO_ARM_MIDR_REVISION_MASK UINT32_C(0x0000000F)
#define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000)
#define CPUINFO_ARM_LINUX_VALID_IMPLEMENTER UINT32_C(0x00020000)
#define CPUINFO_ARM_LINUX_VALID_VARIANT UINT32_C(0x00040000)
#define CPUINFO_LINUX_FLAG_VALID UINT32_C(0x00001000)
#define CPUINFO_ARM_LINUX_VALID_MIDR UINT32_C(0x003F0000)
#define CPUINFO_ARM_LINUX_VALID_PART UINT32_C(0x00080000)
#define CPUINFO_ARM_LINUX_VALID_PROCESSOR UINT32_C(0x00200000)
#define CPUINFO_ARM_LINUX_VALID_REVISION UINT32_C(0x00100000)
#define CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET 24
#define CPUINFO_ARM_MIDR_VARIANT_OFFSET 20
#define CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET 16
2020-07-04 01:21:30 +08:00
#define CPUINFO_ARM_MIDR_PART_OFFSET 4
#define CPUINFO_ARM_MIDR_REVISION_OFFSET 0
2021-04-08 15:34:23 +08:00
#ifdef __aarch64__
2020-07-04 01:21:30 +08:00
#define CPUINFO_ARM_LINUX_FEATURE_FPHP UINT32_C(0x00000200)
#define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP UINT32_C(0x00000400)
#define CPUINFO_ARM_LINUX_FEATURE_ASIMDDP UINT32_C(0x00100000)
2022-10-30 08:44:24 +08:00
// ref: https://cs.android.com/android/platform/superproject/+/master:bionic/libc/kernel/uapi/asm-arm64/asm/hwcap.h;drc=04da58f5b3bc40dbbafb4f8422aa2991479d9e1e;l=70
#define CPUINFO_ARM_LINUX_FEATURE_I8MM UINT32_C(0x00002000)
#define CPUINFO_ARM_LINUX_FEATURE_SVE UINT32_C(0x00400000)
#define CPUINFO_ARM_LINUX_FEATURE_SVE2 UINT32_C(0x00000002)
2021-04-08 15:34:23 +08:00
#else
#define CPUINFO_ARM_LINUX_FEATURE_HALF UINT32_C(0x00000002)
#define CPUINFO_ARM_LINUX_FEATURE_NEON UINT32_C(0x00001000)
#endif
2020-07-04 01:21:30 +08:00
struct cpuinfo_arm_linux_processor {
uint32_t architecture_version;
// Main ID Register value
uint32_t midr;
2020-07-04 01:21:30 +08:00
uint32_t max_frequency;
uint32_t min_frequency;
2020-07-04 01:21:30 +08:00
uint32_t system_processor_id;
uint32_t flags;
};
struct proc_cpuinfo_parser_state {
char* hardware;
uint32_t processor_index;
uint32_t max_processors_count;
struct cpuinfo_arm_linux_processor* processors;
struct cpuinfo_arm_linux_processor dummy_processor;
};
typedef bool (*cpuinfo_line_callback)(const char*, const char*, void*, uint64_t);
inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) {
return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
2020-07-04 01:21:30 +08:00
((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
}
inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) {
return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
2020-07-04 01:21:30 +08:00
((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
}
inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) {
2020-07-04 01:21:30 +08:00
return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | ((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
}
inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) {
return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
2020-07-04 01:21:30 +08:00
((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK);
}
inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) {
return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
2020-07-04 01:21:30 +08:00
((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK);
}
2021-04-08 15:34:23 +08:00
inline static uint32_t midr_get_variant(uint32_t midr) {
return (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) >> CPUINFO_ARM_MIDR_VARIANT_OFFSET;
}
static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
return (bitfield & mask) == mask;
}
2020-07-04 01:21:30 +08:00
static void parse_cpu_part(const char* cpu_part_start, const char* cpu_part_end,
struct cpuinfo_arm_linux_processor* processor) {
const size_t cpu_part_length = (size_t)(cpu_part_end - cpu_part_start);
/*
* CPU part should contain hex prefix (0x) and one to three hex digits.
* I have never seen less than three digits as a value of this field,
* but I don't think it is impossible to see such values in future.
* Value can not contain more than three hex digits since
* Main ID Register (MIDR) assigns only a 12-bit value for CPU part.
*/
if (cpu_part_length < 3 || cpu_part_length > 5) {
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)\n", (int)cpu_part_length,
cpu_part_start, cpu_part_length);
return;
}
/* Verify the presence of hex prefix */
if (cpu_part_start[0] != '0' || cpu_part_start[1] != 'x') {
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU part %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix\n", (int)cpu_part_length,
cpu_part_start);
return;
}
/* Verify that characters after hex prefix are hexadecimal digits and decode them */
uint32_t cpu_part = 0;
for (const char* digit_ptr = cpu_part_start + 2; digit_ptr != cpu_part_end; digit_ptr++) {
const char digit_char = *digit_ptr;
uint32_t digit;
if (digit_char >= '0' && digit_char <= '9') {
digit = digit_char - '0';
2020-07-04 01:21:30 +08:00
} else if ((uint32_t)(digit_char - 'A') < 6) {
digit = 10 + (digit_char - 'A');
2020-07-04 01:21:30 +08:00
} else if ((uint32_t)(digit_char - 'a') < 6) {
digit = 10 + (digit_char - 'a');
} else {
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character %c at offset %zu\n",
(int)cpu_part_length, cpu_part_start, digit_char, (size_t)(digit_ptr - cpu_part_start));
return;
}
cpu_part = cpu_part * 16 + digit;
}
processor->midr = midr_set_part(processor->midr, cpu_part);
processor->flags |= CPUINFO_ARM_LINUX_VALID_PART | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
}
2020-07-04 01:21:30 +08:00
static void parse_cpu_revision(const char* cpu_revision_start, const char* cpu_revision_end,
struct cpuinfo_arm_linux_processor* processor) {
uint32_t cpu_revision = 0;
for (const char* digit_ptr = cpu_revision_start; digit_ptr != cpu_revision_end; digit_ptr++) {
2020-07-04 01:21:30 +08:00
const uint32_t digit = (uint32_t)(*digit_ptr - '0');
/* Verify that the character in CPU revision is a decimal digit */
if (digit >= 10) {
2020-07-04 01:21:30 +08:00
MNN_PRINT(
"CPU revision %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset "
"%zu\n",
(int)(cpu_revision_end - cpu_revision_start), cpu_revision_start, *digit_ptr,
(size_t)(digit_ptr - cpu_revision_start));
return;
}
cpu_revision = cpu_revision * 10 + digit;
}
processor->midr = midr_set_revision(processor->midr, cpu_revision);
processor->flags |= CPUINFO_ARM_LINUX_VALID_REVISION | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
}
2020-07-04 01:21:30 +08:00
static void parse_cpu_architecture(const char* cpu_architecture_start, const char* cpu_architecture_end,
struct cpuinfo_arm_linux_processor* processor) {
const size_t cpu_architecture_length = (size_t)(cpu_architecture_end - cpu_architecture_start);
/* Early AArch64 kernels report "CPU architecture: AArch64" instead of a numeric value 8 */
if (cpu_architecture_length == 7) {
if (memcmp(cpu_architecture_start, "AArch64", cpu_architecture_length) == 0) {
2020-07-04 01:21:30 +08:00
processor->midr = midr_set_architecture(processor->midr, UINT32_C(0xF));
processor->architecture_version = 8;
processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
return;
}
}
2020-07-04 01:21:30 +08:00
uint32_t architecture = 0;
const char* cpu_architecture_ptr = cpu_architecture_start;
for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) {
const uint32_t digit = (*cpu_architecture_ptr) - '0';
/* Verify that CPU architecture is a decimal number */
if (digit >= 10) {
break;
}
architecture = architecture * 10 + digit;
}
if (cpu_architecture_ptr == cpu_architecture_start) {
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU architecture %.*s in /proc/cpuinfo is ignored due to non-digit at the beginning of the string\n",
(int)cpu_architecture_length, cpu_architecture_start);
} else {
if (architecture != 0) {
processor->architecture_version = architecture;
processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) {
const char feature = *cpu_architecture_ptr;
switch (feature) {
case ' ':
case '\t':
/* Ignore whitespace at the end */
break;
default:
2020-07-04 01:21:30 +08:00
MNN_PRINT("skipped unknown architectural feature '%c' for ARMv%u\n", feature, architecture);
break;
}
}
} else {
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU architecture %.*s in /proc/cpuinfo is ignored due to invalid value (0)\n",
(int)cpu_architecture_length, cpu_architecture_start);
}
}
uint32_t midr_architecture = UINT32_C(0xF);
2020-07-04 01:21:30 +08:00
processor->midr = midr_set_architecture(processor->midr, midr_architecture);
}
2020-07-04 01:21:30 +08:00
static uint32_t parse_processor_number(const char* processor_start, const char* processor_end) {
const size_t processor_length = (size_t)(processor_end - processor_start);
if (processor_length == 0) {
2020-07-04 01:21:30 +08:00
MNN_PRINT("Processor number in /proc/cpuinfo is ignored: string is empty\n");
return 0;
}
uint32_t processor_number = 0;
for (const char* digit_ptr = processor_start; digit_ptr != processor_end; digit_ptr++) {
2020-07-04 01:21:30 +08:00
const uint32_t digit = (uint32_t)(*digit_ptr - '0');
if (digit > 10) {
2020-07-04 01:21:30 +08:00
MNN_PRINT("non-decimal suffix %.*s in /proc/cpuinfo processor number is ignored\n",
(int)(processor_end - digit_ptr), digit_ptr);
break;
}
processor_number = processor_number * 10 + digit;
}
return processor_number;
}
2020-07-04 01:21:30 +08:00
static void parse_cpu_variant(const char* cpu_variant_start, const char* cpu_variant_end,
struct cpuinfo_arm_linux_processor* processor) {
const size_t cpu_variant_length = cpu_variant_end - cpu_variant_start;
/*
* Value should contain hex prefix (0x) and one hex digit.
* Value can not contain more than one hex digits since
* Main ID Register (MIDR) assigns only a 4-bit value for CPU variant.
*/
if (cpu_variant_length != 3) {
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)\n",
(int)cpu_variant_length, cpu_variant_start, cpu_variant_length);
return;
}
/* Skip if there is no hex prefix (0x) */
if (cpu_variant_start[0] != '0' || cpu_variant_start[1] != 'x') {
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU variant %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix\n", (int)cpu_variant_length,
cpu_variant_start);
return;
}
/* Check if the value after hex prefix is indeed a hex digit and decode it. */
const char digit_char = cpu_variant_start[2];
uint32_t cpu_variant;
2020-07-04 01:21:30 +08:00
if ((uint32_t)(digit_char - '0') < 10) {
cpu_variant = (uint32_t)(digit_char - '0');
} else if ((uint32_t)(digit_char - 'A') < 6) {
cpu_variant = 10 + (uint32_t)(digit_char - 'A');
} else if ((uint32_t)(digit_char - 'a') < 6) {
cpu_variant = 10 + (uint32_t)(digit_char - 'a');
} else {
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c'\n",
(int)cpu_variant_length, cpu_variant_start, digit_char);
return;
}
processor->midr = midr_set_variant(processor->midr, cpu_variant);
processor->flags |= CPUINFO_ARM_LINUX_VALID_VARIANT | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
}
2020-07-04 01:21:30 +08:00
static void parse_cpu_implementer(const char* cpu_implementer_start, const char* cpu_implementer_end,
struct cpuinfo_arm_linux_processor* processor) {
const size_t cpu_implementer_length = cpu_implementer_end - cpu_implementer_start;
/*
* Value should contain hex prefix (0x) and one or two hex digits.
* I have never seen single hex digit as a value of this field,
* but I don't think it is impossible in future.
* Value can not contain more than two hex digits since
* Main ID Register (MIDR) assigns only an 8-bit value for CPU implementer.
*/
switch (cpu_implementer_length) {
case 3:
case 4:
break;
default:
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)\n",
(int)cpu_implementer_length, cpu_implementer_start, cpu_implementer_length);
return;
}
/* Verify the presence of hex prefix */
if (cpu_implementer_start[0] != '0' || cpu_implementer_start[1] != 'x') {
2020-07-04 01:21:30 +08:00
MNN_PRINT("CPU implementer %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix\n",
(int)cpu_implementer_length, cpu_implementer_start);
return;
}
/* Verify that characters after hex prefix are hexadecimal digits and decode them */
uint32_t cpu_implementer = 0;
for (const char* digit_ptr = cpu_implementer_start + 2; digit_ptr != cpu_implementer_end; digit_ptr++) {
const char digit_char = *digit_ptr;
uint32_t digit;
if (digit_char >= '0' && digit_char <= '9') {
digit = digit_char - '0';
2020-07-04 01:21:30 +08:00
} else if ((uint32_t)(digit_char - 'A') < 6) {
digit = 10 + (digit_char - 'A');
2020-07-04 01:21:30 +08:00
} else if ((uint32_t)(digit_char - 'a') < 6) {
digit = 10 + (digit_char - 'a');
} else {
2020-07-04 01:21:30 +08:00
MNN_PRINT(
"CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c' at offset "
"%zu\n",
(int)cpu_implementer_length, cpu_implementer_start, digit_char,
(size_t)(digit_ptr - cpu_implementer_start));
return;
}
cpu_implementer = cpu_implementer * 16 + digit;
}
processor->midr = midr_set_implementer(processor->midr, cpu_implementer);
processor->flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
}
2020-07-04 01:21:30 +08:00
static bool parse_line(const char* line_start, const char* line_end, struct proc_cpuinfo_parser_state* state,
uint64_t line_number) {
/* Empty line. Skip. */
if (line_start == line_end) {
return true;
}
2020-07-04 01:21:30 +08:00
/* Search for ':' on the line. */
const char* separator = line_start;
for (; separator != line_end; separator++) {
if (*separator == ':') {
break;
}
}
/* Skip line if no ':' separator was found. */
if (separator == line_end) {
2020-07-04 01:21:30 +08:00
MNN_PRINT("Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found\n",
(int)(line_end - line_start), line_start);
return true;
}
/* Skip trailing spaces in key part. */
const char* key_end = separator;
for (; key_end != line_start; key_end--) {
if (key_end[-1] != ' ' && key_end[-1] != '\t') {
break;
}
}
/* Skip line if key contains nothing but spaces. */
if (key_end == line_start) {
2020-07-04 01:21:30 +08:00
MNN_PRINT("Line %.*s in /proc/cpuinfo is ignored: key contains only spaces\n", (int)(line_end - line_start),
line_start);
return true;
}
/* Skip leading spaces in value part. */
const char* value_start = separator + 1;
for (; value_start != line_end; value_start++) {
if (*value_start != ' ') {
break;
}
}
/* Value part contains nothing but spaces. Skip line. */
if (value_start == line_end) {
2020-07-04 01:21:30 +08:00
MNN_PRINT("Line %.*s in /proc/cpuinfo is ignored: value contains only spaces\n", (int)(line_end - line_start),
line_start);
return true;
}
/* Skip trailing spaces in value part (if any) */
const char* value_end = line_end;
for (; value_end != value_start; value_end--) {
if (value_end[-1] != ' ') {
break;
}
}
2020-07-04 01:21:30 +08:00
const uint32_t processor_index = state->processor_index;
const uint32_t max_processors_count = state->max_processors_count;
struct cpuinfo_arm_linux_processor* processors = state->processors;
struct cpuinfo_arm_linux_processor* processor = &state->dummy_processor;
if (processor_index < max_processors_count) {
processor = &processors[processor_index];
}
const size_t key_length = key_end - line_start;
switch (key_length) {
case 6:
break;
case 8:
if (memcmp(line_start, "CPU part", key_length) == 0) {
parse_cpu_part(value_start, value_end, processor);
} else if (memcmp(line_start, "Features", key_length) == 0) {
/* parse_features(value_start, value_end, processor); */
} else if (memcmp(line_start, "BogoMIPS", key_length) == 0) {
/* BogoMIPS is useless, don't parse */
} else if (memcmp(line_start, "Hardware", key_length) == 0) {
size_t value_length = value_end - value_start;
if (value_length > CPUINFO_HARDWARE_VALUE_MAX) {
MNN_PRINT(
2020-07-04 01:21:30 +08:00
"length of Hardware value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the "
"limit\n",
(int)value_length, value_start, CPUINFO_HARDWARE_VALUE_MAX);
value_length = CPUINFO_HARDWARE_VALUE_MAX;
} else {
state->hardware[value_length] = '\0';
}
memcpy(state->hardware, value_start, value_length);
2020-07-04 01:21:30 +08:00
MNN_PRINT("parsed /proc/cpuinfo Hardware = \"%.*s\"\n", (int)value_length, value_start);
} else if (memcmp(line_start, "Revision", key_length) == 0) {
/* Board revision, no use for now */
}
break;
case 9:
if (memcmp(line_start, "processor", key_length) == 0) {
const uint32_t new_processor_index = parse_processor_number(value_start, value_end);
if (new_processor_index < processor_index) {
/* Strange: decreasing processor number */
2020-07-04 01:21:30 +08:00
MNN_PRINT("unexpectedly low processor number %u following processor %u in /proc/cpuinfo\n",
new_processor_index, processor_index);
} else if (new_processor_index > processor_index + 1) {
/* Strange, but common: skipped processor $(processor_index + 1) */
2020-07-04 01:21:30 +08:00
MNN_PRINT("unexpectedly high processor number %u following processor %u in /proc/cpuinfo\n",
new_processor_index, processor_index);
}
if (new_processor_index < max_processors_count) {
/* Record that the processor was mentioned in /proc/cpuinfo */
processors[new_processor_index].flags |= CPUINFO_ARM_LINUX_VALID_PROCESSOR;
} else {
/* Log and ignore processor */
MNN_PRINT("processor %u in /proc/cpuinfo is ignored: index exceeds system limit %u\n",
2020-07-04 01:21:30 +08:00
new_processor_index, max_processors_count - 1);
}
state->processor_index = new_processor_index;
return true;
} else if (memcmp(line_start, "Processor", key_length) == 0) {
/* TODO: parse to fix misreported architecture, similar to Android's cpufeatures */
}
break;
case 11:
if (memcmp(line_start, "CPU variant", key_length) == 0) {
parse_cpu_variant(value_start, value_end, processor);
}
break;
case 12:
if (memcmp(line_start, "CPU revision", key_length) == 0) {
parse_cpu_revision(value_start, value_end, processor);
}
break;
case 15:
if (memcmp(line_start, "CPU implementer", key_length) == 0) {
parse_cpu_implementer(value_start, value_end, processor);
} else if (memcmp(line_start, "CPU implementor", key_length) == 0) {
parse_cpu_implementer(value_start, value_end, processor);
}
break;
case 16:
if (memcmp(line_start, "CPU architecture", key_length) == 0) {
parse_cpu_architecture(value_start, value_end, processor);
}
break;
default:
break;
}
return true;
}
2020-07-04 01:21:30 +08:00
bool cpuinfo_linux_parse_multiline_file(const char* filename, size_t buffer_size, cpuinfo_line_callback callback,
void* context) {
#define RETIEMENT \
if (file != -1) { \
close(file); \
file = -1; \
} \
return false;
2020-07-04 01:21:30 +08:00
int file = -1;
bool status = false;
char* buffer = (char*)alloca(buffer_size);
file = open(filename, O_RDONLY);
if (file == -1) {
MNN_PRINT("failed to open %s\n", filename);
RETIEMENT
}
/* Only used for error reporting */
2020-07-04 01:21:30 +08:00
size_t position = 0;
uint64_t line_number = 1;
const char* buffer_end = &buffer[buffer_size];
2020-07-04 01:21:30 +08:00
char* data_start = buffer;
ssize_t bytes_read;
do {
2020-07-04 01:21:30 +08:00
bytes_read = read(file, data_start, (size_t)(buffer_end - data_start));
if (bytes_read < 0) {
2020-07-04 01:21:30 +08:00
MNN_PRINT("failed to read file %s at position %zu\n", filename, position);
RETIEMENT
}
2020-07-04 01:21:30 +08:00
position += (size_t)bytes_read;
const char* data_end = data_start + (size_t)bytes_read;
const char* line_start = buffer;
if (bytes_read == 0) {
/* No more data in the file: process the remaining text in the buffer as a single entry */
const char* line_end = data_end;
if (!callback(line_start, line_end, context, line_number)) {
RETIEMENT
}
} else {
const char* line_end;
do {
/* Find the end of the entry, as indicated by newline character ('\n') */
for (line_end = line_start; line_end != data_end; line_end++) {
if (*line_end == '\n') {
break;
}
}
/*
* If we located separator at the end of the entry, parse it.
* Otherwise, there may be more data at the end; read the file once again.
*/
if (line_end != data_end) {
if (!callback(line_start, line_end, context, line_number++)) {
RETIEMENT
}
line_start = line_end + 1;
}
} while (line_end != data_end);
/* Move remaining partial line data at the end to the beginning of the buffer */
2020-07-04 01:21:30 +08:00
const size_t line_length = (size_t)(line_end - line_start);
memmove(buffer, line_start, line_length);
data_start = &buffer[line_length];
}
} while (bytes_read != 0);
/* Commit */
status = true;
2020-08-27 20:57:37 +08:00
if (file != -1) {
close(file);
file = -1;
}
return status;
}
2020-07-04 01:21:30 +08:00
bool cpuinfo_arm_linux_parse_proc_cpuinfo(char* hardware, uint32_t max_processors_count,
struct cpuinfo_arm_linux_processor* processors) {
struct proc_cpuinfo_parser_state state = {
2020-07-04 01:21:30 +08:00
.hardware = hardware,
.processor_index = 0,
.max_processors_count = max_processors_count,
2020-07-04 01:21:30 +08:00
.processors = processors,
};
2020-07-04 01:21:30 +08:00
return cpuinfo_linux_parse_multiline_file("/proc/cpuinfo", BUFFER_SIZE, (cpuinfo_line_callback)parse_line, &state);
}
2020-07-04 01:21:30 +08:00
static inline int cpuinfo_android_property_get(const char* key, char* value) {
return __system_property_get(key, value);
}
void cpuinfo_arm_android_parse_properties(struct cpuinfo_android_properties* properties) {
cpuinfo_android_property_get("ro.product.board", properties->ro_product_board);
cpuinfo_android_property_get("ro.board.platform", properties->ro_board_platform);
cpuinfo_android_property_get("ro.mediatek.platform", properties->ro_mediatek_platform);
cpuinfo_android_property_get("ro.arch", properties->ro_arch);
cpuinfo_android_property_get("ro.chipname", properties->ro_chipname);
cpuinfo_android_property_get("ro.hardware.chipname", properties->ro_hardware_chipname);
}
static inline uint16_t load_u16le(const void* ptr) {
return *((const uint16_t*)ptr);
}
static inline uint32_t load_u32le(const void* ptr) {
return *((const uint32_t*)ptr);
}
/**
* Tries to match /Samsung Exynos\d{4}$/ signature (case-insensitive) for Samsung Exynos chipsets.
* If match successful, extracts model information into \p chipset argument.
*
* @param start - start of the /proc/cpuinfo Hardware string to match.
* @param end - end of the /proc/cpuinfo Hardware string to match.
* @param[out] chipset - location where chipset information will be stored upon a successful match.
*
* @returns true if signature matched, false otherwise.
*/
static bool match_samsung_exynos(const char* start, const char* end, struct cpuinfo_arm_chipset* chipset) {
/*
* Expect at 18-19 symbols:
* - "Samsung" (7 symbols) + space + "Exynos" (6 symbols) + optional space 4-digit model number
*/
const size_t length = end - start;
switch (length) {
case 18:
case 19:
break;
default:
return false;
}
/*
* Check that the string starts with "samsung exynos", case-insensitive.
* Blocks of 4 characters are loaded and compared as little-endian 32-bit word.
* Case-insensitive characters are binary ORed with 0x20 to convert them to lowercase.
*/
const uint32_t expected_sams = UINT32_C(0x20202000) | load_u32le(start);
if (expected_sams != UINT32_C(0x736D6153) /* "smaS" = reverse("Sams") */) {
return false;
}
const uint32_t expected_ung = UINT32_C(0x00202020) | load_u32le(start + 4);
if (expected_ung != UINT32_C(0x20676E75) /* " ung" = reverse("ung ") */) {
return false;
}
const uint32_t expected_exyn = UINT32_C(0x20202000) | load_u32le(start + 8);
if (expected_exyn != UINT32_C(0x6E797845) /* "nyxE" = reverse("Exyn") */) {
return false;
}
const uint16_t expected_os = UINT16_C(0x2020) | load_u16le(start + 12);
if (expected_os != UINT16_C(0x736F) /* "so" = reverse("os") */) {
return false;
}
const char* pos = start + 14;
/* There can be a space ' ' following the "Exynos" string */
if (*pos == ' ') {
pos++;
/* If optional space if present, we expect exactly 19 characters */
if (length != 19) {
return false;
}
}
/* Validate and parse 4-digit model number */
uint32_t model = 0;
for (uint32_t i = 0; i < 4; i++) {
const uint32_t digit = (uint32_t)(uint8_t)(*pos++) - '0';
if (digit >= 10) {
/* Not really a digit */
return false;
}
model = model * 10 + digit;
}
/* Return parsed chipset */
*chipset = (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_samsung,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = model,
};
return true;
}
/**
* Tries to match /exynos\d{4}$/ signature for Samsung Exynos chipsets.
* If match successful, extracts model information into \p chipset argument.
*
* @param start - start of the platform identifier (ro.board.platform or ro.chipname) to match.
* @param end - end of the platform identifier (ro.board.platform or ro.chipname) to match.
* @param[out] chipset - location where chipset information will be stored upon a successful match.
*
* @returns true if signature matched, false otherwise.
*/
static bool match_exynos(const char* start, const char* end, struct cpuinfo_arm_chipset* chipset) {
/* Expect exactly 10 symbols: "exynos" (6 symbols) + 4-digit model number */
if (start + 10 != end) {
return false;
}
/* Load first 4 bytes as little endian 32-bit word */
const uint32_t expected_exyn = load_u32le(start);
if (expected_exyn != UINT32_C(0x6E797865) /* "nyxe" = reverse("exyn") */) {
return false;
}
/* Load next 2 bytes as little endian 16-bit word */
const uint16_t expected_os = load_u16le(start + 4);
if (expected_os != UINT16_C(0x736F) /* "so" = reverse("os") */) {
return false;
}
/* Check and parse 4-digit model number */
uint32_t model = 0;
for (uint32_t i = 6; i < 10; i++) {
const uint32_t digit = (uint32_t)(uint8_t)start[i] - '0';
if (digit >= 10) {
/* Not really a digit */
return false;
}
model = model * 10 + digit;
}
/* Return parsed chipset. */
*chipset = (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_samsung,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = model,
};
return true;
}
/**
* Tries to match /universal\d{4}$/ signature for Samsung Exynos chipsets.
* If match successful, extracts model information into \p chipset argument.
*
* @param start - start of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board or ro.chipname)
* to match.
* @param end - end of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board or ro.chipname)
* to match.
* @param[out] chipset - location where chipset information will be stored upon a successful match.
*
* @returns true if signature matched, false otherwise.
*/
static bool match_universal(const char* start, const char* end, struct cpuinfo_arm_chipset* chipset) {
/* Expect exactly 13 symbols: "universal" (9 symbols) + 4-digit model number */
if (start + 13 != end) {
return false;
}
/*
* Check that the string starts with "universal".
* Blocks of 4 characters are loaded and compared as little-endian 32-bit word.
* Case-insensitive characters are binary ORed with 0x20 to convert them to lowercase.
*/
const uint8_t expected_u = UINT8_C(0x20) | (uint8_t)start[0];
if (expected_u != UINT8_C(0x75) /* "u" */) {
return false;
}
const uint32_t expected_nive = UINT32_C(0x20202020) | load_u32le(start + 1);
if (expected_nive != UINT32_C(0x6576696E) /* "evin" = reverse("nive") */) {
return false;
}
const uint32_t expected_ersa = UINT32_C(0x20202020) | load_u32le(start + 5);
if (expected_ersa != UINT32_C(0x6C617372) /* "lasr" = reverse("rsal") */) {
return false;
}
/* Validate and parse 4-digit model number */
uint32_t model = 0;
for (uint32_t i = 9; i < 13; i++) {
const uint32_t digit = (uint32_t)(uint8_t)start[i] - '0';
if (digit >= 10) {
/* Not really a digit */
return false;
}
model = model * 10 + digit;
}
/* Return parsed chipset. */
*chipset = (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_samsung,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = model,
};
return true;
}
struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_hardware(const char* hardware,
uint32_t cores,
uint32_t max_cpu_freq_max) {
struct cpuinfo_arm_chipset chipset;
const size_t hardware_length = strnlen(hardware, CPUINFO_HARDWARE_VALUE_MAX);
const char* hardware_end = hardware + hardware_length;
if (match_samsung_exynos(hardware, hardware_end, &chipset)) {
return chipset;
}
if (match_universal(hardware, hardware_end, &chipset)) {
return chipset;
}
return (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_unknown,
.series = cpuinfo_arm_chipset_series_unknown,
};
}
struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_product_board(const char* ro_product_board,
uint32_t cores,
uint32_t max_cpu_freq_max) {
struct cpuinfo_arm_chipset chipset;
const char* board = ro_product_board;
const size_t board_length = strnlen(ro_product_board, CPUINFO_BUILD_PROP_VALUE_MAX);
const char* board_end = ro_product_board + board_length;
if (match_universal(board, board_end, &chipset)) {
return chipset;
}
return (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_unknown,
.series = cpuinfo_arm_chipset_series_unknown,
};
}
struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_board_platform(const char* platform,
uint32_t cores,
uint32_t max_cpu_freq_max) {
struct cpuinfo_arm_chipset chipset;
const size_t platform_length = strnlen(platform, CPUINFO_BUILD_PROP_VALUE_MAX);
const char* platform_end = platform + platform_length;
if (match_exynos(platform, platform_end, &chipset)) {
return chipset;
}
return (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_unknown,
.series = cpuinfo_arm_chipset_series_unknown,
};
}
struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_mediatek_platform(const char* platform) {
return (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_unknown,
.series = cpuinfo_arm_chipset_series_unknown,
};
}
struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_arch(const char* arch) {
struct cpuinfo_arm_chipset chipset;
const char* arch_end = arch + strnlen(arch, CPUINFO_BUILD_PROP_VALUE_MAX);
/* Check Samsung exynosXXXX signature */
if (match_exynos(arch, arch_end, &chipset)) {
return chipset;
}
return (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_unknown,
.series = cpuinfo_arm_chipset_series_unknown,
};
}
struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_chipname(const char* chipname) {
struct cpuinfo_arm_chipset chipset;
const size_t chipname_length = strnlen(chipname, CPUINFO_BUILD_PROP_VALUE_MAX);
const char* chipname_end = chipname + chipname_length;
if (match_exynos(chipname, chipname_end, &chipset)) {
return chipset;
}
if (match_universal(chipname, chipname_end, &chipset)) {
return chipset;
}
return (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_unknown,
.series = cpuinfo_arm_chipset_series_unknown,
};
}
struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset(const struct cpuinfo_android_properties* properties,
uint32_t cores, uint32_t max_cpu_freq_max) {
// this function is used to decode chipset, which is only used to detect Samsung Exynos chipsets
// so chipesets now only have TWO classes, one is cpuinfo_arm_chipset_vendor_samsung, the other is
// cpuinfo_arm_chipset_vendor_unknown
struct cpuinfo_arm_chipset chipset = {
.vendor = cpuinfo_arm_chipset_vendor_unknown,
.series = cpuinfo_arm_chipset_series_unknown,
};
struct cpuinfo_arm_chipset chipsets[cpuinfo_android_chipset_property_max] = {
[cpuinfo_android_chipset_property_proc_cpuinfo_hardware] =
cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_hardware(properties->proc_cpuinfo_hardware, cores,
max_cpu_freq_max),
[cpuinfo_android_chipset_property_ro_product_board] = cpuinfo_arm_android_decode_chipset_from_ro_product_board(
properties->ro_product_board, cores, max_cpu_freq_max),
[cpuinfo_android_chipset_property_ro_board_platform] =
cpuinfo_arm_android_decode_chipset_from_ro_board_platform(properties->ro_board_platform, cores,
max_cpu_freq_max),
[cpuinfo_android_chipset_property_ro_mediatek_platform] =
cpuinfo_arm_android_decode_chipset_from_ro_mediatek_platform(properties->ro_mediatek_platform),
[cpuinfo_android_chipset_property_ro_arch] =
cpuinfo_arm_android_decode_chipset_from_ro_arch(properties->ro_arch),
[cpuinfo_android_chipset_property_ro_chipname] =
cpuinfo_arm_android_decode_chipset_from_ro_chipname(properties->ro_chipname),
[cpuinfo_android_chipset_property_ro_hardware_chipname] =
cpuinfo_arm_android_decode_chipset_from_ro_chipname(properties->ro_hardware_chipname),
};
enum cpuinfo_arm_chipset_vendor vendor = cpuinfo_arm_chipset_vendor_unknown;
for (size_t i = 0; i < cpuinfo_android_chipset_property_max; ++i) {
const enum cpuinfo_arm_chipset_vendor decoded_vendor = chipsets[i].vendor;
if (decoded_vendor != cpuinfo_arm_chipset_vendor_unknown) {
if (vendor == cpuinfo_arm_chipset_vendor_unknown) {
vendor = decoded_vendor;
} else if (vendor != decoded_vendor) {
// MNN_PRINT(
// "[MNN WARNING] chipset detection failed: different chipset vendors reported in different system "
// "properties\n");
2020-07-04 01:21:30 +08:00
return chipset;
}
}
}
if (vendor == cpuinfo_arm_chipset_vendor_unknown) {
// MNN_PRINT("[MNN WARNING] chipset detection failed: none of the system properties matched known signatures\n");
2020-07-04 01:21:30 +08:00
return chipset;
}
for (size_t i = 0; i < cpuinfo_android_chipset_property_max; ++i) {
if (chipsets[i].series != cpuinfo_arm_chipset_series_unknown) {
chipset = chipsets[i];
break;
}
}
// MNN_PRINT("chipset vendor, series, model is: %d, %d, %d\n", chipset.vendor, chipset.series, chipset.model);
return chipset;
}
#endif // __ANDROID__
#if defined(__APPLE__) && defined(__aarch64__)
static uint32_t get_sys_info_by_name(const char* type_specifier) {
2020-07-04 01:21:30 +08:00
size_t size = 0;
uint32_t result = 0;
if (sysctlbyname(type_specifier, NULL, &size, NULL, 0) != 0) {
MNN_PRINT("sysctlbyname(\"%s\") failed\n", type_specifier);
} else if (size == sizeof(uint32_t)) {
sysctlbyname(type_specifier, &result, &size, NULL, 0);
MNN_PRINT("%s: %u , size = %lu\n", type_specifier, result, size);
} else {
MNN_PRINT("sysctl does not support non-integer lookup for (\"%s\")\n", type_specifier);
}
return result;
}
#endif // iOS
2020-07-04 01:21:30 +08:00
void cpuinfo_arm_init(struct cpuinfo_arm_isa* cpuinfo_isa) {
memset(cpuinfo_isa, 0, sizeof(struct cpuinfo_arm_isa));
2020-07-04 01:21:30 +08:00
// android
#ifdef __ANDROID__
struct cpuinfo_arm_linux_processor* arm_linux_processors = NULL;
2020-07-04 01:21:30 +08:00
const uint32_t processors_count = getNumberOfCPU();
char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX] = {0};
arm_linux_processors = static_cast<struct cpuinfo_arm_linux_processor*>(
calloc(processors_count, sizeof(struct cpuinfo_arm_linux_processor)));
if (arm_linux_processors == NULL) {
MNN_PRINT("failed to allocate %zu bytes for descriptions of %u ARM logical processors\n",
processors_count * sizeof(struct cpuinfo_arm_linux_processor), processors_count);
return;
}
2020-07-04 01:21:30 +08:00
if (!cpuinfo_arm_linux_parse_proc_cpuinfo(proc_cpuinfo_hardware, processors_count, arm_linux_processors)) {
MNN_PRINT("failed to parse processor information from /proc/cpuinfo\n");
return;
}
2020-07-04 01:21:30 +08:00
uint32_t valid_processor_mask = 0;
2020-07-04 01:21:30 +08:00
for (uint32_t i = 0; i < processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, valid_processor_mask)) {
arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID;
}
}
uint32_t valid_processors = 0, last_midr = 0;
for (uint32_t i = 0; i < processors_count; i++) {
arm_linux_processors[i].system_processor_id = i;
2020-07-04 01:21:30 +08:00
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
valid_processors += 1;
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
last_midr = arm_linux_processors[i].midr;
}
}
}
2020-07-04 01:21:30 +08:00
[MNN:Sync] Sync internal github Commits: 8148ae75c 弗人 bugfix 14cb8ec7f 弗人 [Converter:Bugfix] bugfix for onnx depthwise convtranspose 476fbcd90 雁行 [MNN:Feature] Open AVX cast and bugfix for contentCFG. 5e26b9fd3 雁行 [Test:Feature] Add android test. 37e147b25 雁行 [MNN:Bugfix] Bugfix for floordiv. 144c185f5 tianbu.xsw hangxing fix hiai b4fd429d6 tianbu.xsw updateCacheFile bugfix -- update cache size d4ba572a8 雁行 [MNN:Bugfix] Support int8 in AVX2 and some Bugfix. 43061f07e xiaying [MNN:Bugfix] Fix bug for module mode run part of model 398cc5ab6 tianhang.yth refactor demo 736380600 xiaying [Express:Bugfix] Fix memory leak for copy branch b8dab0a27 tianhang.yth MNNFloat2Int8 sizeQuad=0 crash fix 94b95bfed ghz [BugFix]1.Better method for fast pack valid check 6a921f85e xiaying [Converter:Bugfix] Fix bug for Fuseconsttosubgraph 5f77ae889 tianhang.yth numThread bugfix a807ef879 tianhang.yth add createSession(configs, runtimeinfo) API, add pymnn demo, pymnn logcat bugfix ad05409d3 xiaying [MNN:Bugfix] Fix bug for StaticModule's sizecompute overflow, add error print for module mode 9d81b8299 xiaying [MNN:Bugfix] Fix bug for Unique op for output size = 1 03b15e9af xiaying [Test:Feature] Add MatMulBConst Test, Fix bug for single Convert c944a76ee tianhang.yth add auto backend and getSessionInfo @tianbu 91fa7267b ghz [BugFix]1.fix the error in eP check bf0041f77 ghz [BugFix]1.Fix the logic error in eP check. 2.Fix the sp align error 693871672 雁行 [CPU:Bugfix] rm adrp instruction for clang compiler bug. 1b8f6b3d8 ghz 1.Fix the wronly use of r13 in arm32 version. 2.Fix the missing callee register save and restore process. feb7ecc4c 弗人 modify log of python offline quant 040c04811 ghz [BufFix]1.replace platform-related regs. 2.fix the same problem in arm32 version 609f37db8 弗人 add log for python quant, python convert 5511dd30a ghz [BugFix]1.Add testcases in SparseConv to check all functional code branch. 2. Fix the bug in "MNNPackC4ForMatMul_A.S" in arm64, which is caused by the missing check of eReal parameter. a93ff9280 tianhang.yth add tf.Unique op support 9729ff773 allen.lk [Bugfix] Fix one arm32 instruction syntax that clang works but gcc DOES NOT work. use index instruction instead. 297c1ad14 雁行 [Expr:Bugfix] bugfix for tensor content used by shape compute. ef8c369e3 弗人 catch exception 07c2dd670 弗人 add dependence to setup, base64 encode url, add time log 177e590c1 弗人 [Python:Feature] add aliyun log for python quant tool 40a7928cf allen.lk [Debug:Sparse] 1.Add group parameter in torchscript converter. 2. Stop split running to avoid memory corruption when check failed in TransformGroupConvolution 3. fix Op split issue in TransformGroupConvolution 3bdea84a1 allen.lk [Debug:Sparse] Fix and warning one kind of segmentfault cause by memory corruption when resize ConvolutionWinograd. Avoid to use some registers as arm restriction. c3c6fbdbd allen.lk [Debug:Sparse] Fix and warning one kind of segmentfault cause by memory corruption when resize ConvolutionWinograd. Avoid to use some registers as arm restriction. bc590eee4 雁行 [Converter:Bugfix] bugfix for onnx instancenormalization convert. d8918593f tianhang.yth add auto backend and getSessionInfo @tianbu 83a198ed7 杭行 update d0dd3e09b 杭行 update 99540202e xiaying [Converter:Optimize] Opt the tensor convert insert 333d8db82 allen.lk [Debug:Sparse] Fix All platform-register r9 / x18 issue on arm32 and arm64. db5994672 杭行 merge 6293de7b8 tianbu.xsw fix pymnn updateCacheFile 5c2e11cb1 tianbu.xsw do updateCache in createSession 6e7641ff4 tianbu.xsw do not limit cacheFile for a model 5287a65e4 tianbu.xsw bugfix 52ba53a91 tianbu.xsw revert pymnn api 60284d830 tianbu.xsw bugfix 6d8077490 tianbu.xsw rename updateCacheFile api params 3cb172710 tianhang.yth updateCacheFile API size default value is 0 c5b69aabf tianbu.xsw updateCacheFile python api fix 5d5da7aa5 tianbu.xsw reflector code 5707877a4 雁行 [MNN:Speed] Speedup for softmax in x86 and arm. 2a211825c tianbu.xsw reflector code for updateCacheFile 76db3a835 tianbu.xsw [Cache Feature]: Add updateCacheFile API for increment cache b06b0fd43 allen.lk [Debug:Sparse] Fix and warning one kind of segmentfault cause by memory corruption when resize ConvolutionWinograd. Avoid to use some registers as arm restriction. e68bfa495 雁行 [Converter:Feature] Add UUID when model convert. a9cb935dc xiaying [MNN:Speed] Support c4nhwc for more fastblit 019f40353 xiaying [Converter:Refractor] Reduce memory used by MNNConvert(bert from 5G -> 1G) d2a6d3d05 xiaying [MNN:Bugfix] Fix bug for identity output not find 604d0801b xiaying [Converter:Bugfix] Fix bug for FuseGeLu 4bada2367 xiaying [MNN:Refractor] SegmentMean rewrite as segment 82070e708 xiaying [MNN:Bugfix] Fix bug for GeometryBinary e8ea4266e xiaying Fix bug for ShapeTensorConvert compute for dim = 1 error 1f1cf1991 xiaying [Tools:Bugfix] Fix system compability for fastTestOnnx 6f422efe2 xiaying [Tools:Bugfix] Remove color for checkDir for easy to dump 968f7ec88 xiaying [MNN:Speed] Support turn broadcast binary to loop 3e7aaf46f xiaying [MNN:Refractor] Set Convolution1x1Strassen support variable input/output ptr 1f65ab163 xiaying [MNN:Bugfix] Fix bug for mini mnn can't convert model d65953d47 xiaying [MNN:Bugfix] Fix bug for armv7a - android-14 + ARM82 8b68be45c xiaying [MNN:Feature] Add segment 8a8f264f5 xiaying [Vulkan:Bugfix] Remove unuseful print 025bb0fda xiaying [Converter:Bugfix] Fix bug for oneof don't support 43900251e tianbu.xsw enable setCacheFile python API ebfb05c74 tianbu.xsw [Metal Feature] support metallib obtain from walle transfer task 9665c0a79 弗人 add check for path in json file c66fef224 xiaying [Converter:Bugfix] Fix bug for oneof don't support 42f192852 xiaying [MNN:Bugfix] Fix bug for not set output / saveTensor into origin Schedule's outputs 1b95354ff 雁行 [Feature]: Support shape compute for SetDiff1D, and null input for Prod. 83966d043 xiaying [Test:Feature] Add test for static module 42d1be933 xiaying [Converter:Bugfix] Fix bug for mnn convert and static model add more outputs for origin model 9067531c3 xiaying [Converter:Refractor] formatLicence 99558bed9 xiaying [Converter:Bugfix] Count the op for unuseful and controlflow 4f6da0fa7 allen.lk [Feature:GRUMultiOutput] fix multi output dimension type c6b219bce xiaying [Converter:Feature] Turn torch converter to object dd4e68a37 xiaying [Converter:Feature] Support dump supported ops 80b6a60a3 xiaying [Converter:Info] If has output name, print output name instead of computed 015278fc3 xiaying [MNN:Refractor] Revert IfModule's debug info 23ac967c4 xiaying Don't transform for multi-input convolution/deconvolution b02b0d4de xiaying Fix bug for multi-input for conv1d 254d8b1d4 xiaying Fix bug for Conv1dSqueezeMove for multi input convolution 1d d47d0b9ca xiaying Fix bug for CPURaster's fuse nc4hw4 357c5bd33 xiaying Fix ConvBiasAdd for conv's inputs op > 1 55b1f0c9c xiaying [Converter:Bugfix] Don't transform for multi-input convolution/deconvolution 1902a30f5 xiaying [Converter:Bugfix] Fix bug for Conv1dSqueezeMove for multi input convolution 1d c23fe617b xiaying [MNN:Bugfix] Fix bug for multi-input for conv1d 8ff018426 xiaying [MNN:Bugfix] Fix bug for CPURaster's fuse nc4hw4 d4e8cd602 xiaying [Converter:Bugfix] Fix ConvBiasAdd for conv's inputs op > 1 846266b42 tianbu.xsw return when program and tune both nullptr fd67c76a9 xiaying [Converter:Bugfix] DepthwiseConvWeightMerge only valid for tflite e77a242c4 xiaying [Converter:Feature] Support tflite's half pixel be054c377 tianbu.xsw [OpenCL Bugfix] do not rewrite cache when binary program is produced 51e65aa35 xiaying [Converter:Feature] Support tflite for fp16 and multi-input convolution 1ccdfdeb5 tianbu.xsw redefine svm macro name 31234d372 tianbu.xsw [OpenCL SVM] add macro for only use wrapper d739e35da xiaying [MNN:Bugfix] Fix compile bug for grid op 24ab13c79 Joker feat(arm82): add GridSample op support in arm82 backend, AVX(by xiaying) 7b142978e xiaying [AVX512:Speed] Optimize for e <= 8 5f6febe7b tianbu.xsw code refactor 998d91b57 xiaying [Express:Speed] Merge submodule for speed 22c89146f tianhang.yth fix alpha div by zero bug and arm server compile bug 8f829a170 tianbu.xsw [OpenCL Pad] unify conv/deconv pad computing 4a28f603e xiaying [Express:Speed] Shared Const for All Submodule c74cf28f3 xiaying [MNN:Refractor] Seperate Const init and schedule 2a1eebb7a xiaying [Tools:Bugfix] Fix bug for modelTest.py count size 72f04008c xiaying [MNN:Refractor] Delete unuseful const op 1e735d03c xiaying [Converter:Bugfix] Fix bug for static module gen 4dfadbc6e xiaying [MNN:Refractor] Rewrite const init mode 1fcf0417a xiaying [MNN:Bugfix] Fix bug for deconvolutin multi-input for multi-batch 41d429cfd xiaying [Train:Bugfix] Revert convert NCHW for mnistTrain f947a5f01 xiaying [Test:Feature] Add testTrain dad59b6f6 tianbu.xsw move realize code from Backend.hpp to Tensor.cpp cf4473ad1 xiaying [Train:Bugfix] Support pad for GeometryPoolGrad 91ab13734 xiaying [MNN:Bugfix] Fix compile bug for avx512 742e80f47 xiaying [MNN:Refractor] Opt the logic for checknan judge 12543b841 xiaying [ARM82:Bugfix] Fix compile bug for ios 3a2b0a49f xiaying [ARM82:Speed] Opt Pack / Unpack for armv8 c0f1995cd xiaying [ARM82:Speed] Opt MNNPackC8FP16 and MNNUnpackC8FP16 by asm e0fc77dcf xiaying [MNN:Speed] Fix bug for DeconvolutionWithStride for C4HW4, open it 584bec578 xiaying [MNN:Bugfix] Fix bug for format set error for onnx d5bd4148d xiaying [MNN:Bugfix] Fix bug for format set error for onnx b00265841 xiaying [MNN:Bugfix] Fix bug for SparseConvolutionTiledExecutor bb09188ac xiaying [Test:Bugfix] Fix bug for run into sparse auto 426d1babd xiaying [MNN:Refractor] Small bugfix for Group convolution and pack 7d0ea1c46 tianbu.xsw [testModel Feature] support testModel.out input resize 4169c54ce xiaying [MNN:Bugfix] Fix bug for checkNAN for origin 412a82222 xiaying [Test:Bugfix] Fix bug for CheckNAN's error of matmul 319b1d425 xiaying [MNN:Bugfix] Fix bug for multi-batch for ConvInt8 050b728a6 xiaying [Test:Bugfix] Use NCHW for ConvInt8Test 7db3423a1 xiaying [OpenCL:Bugfix] Fix bug for opencl::image,opencl::buffer for C4HW4 adcec6a7f xiaying [Vulkan:Bugfix] Fix bug for invalid tensor size limit d2a7cf4e9 xiaying [Vulkan:Bugfix] Fix bug for onCopyBuffer of nc4hw4 557bebdd3 xiaying [MNN:Bugfix] Fix bug for BF16-ARM32 bbe186649 tianbu.xsw [Update AUTO mode]: fix MNN_FORWARD_AUTO choose priority 6deb23439 xiaying [MNN:Bugfix] Fix bug for GeometryBinary don't care about NC4HW4 same size b137590e4 xiaying [MNN:Bugfix] Fix bug for GeometryBinary don't care about NC4HW4 same size 7003558ea xiaying [Converter:Bugfix] Fix bug for onnx pad for serveral case b5f8cae5a xiaying [Converter:Bugfix] Fix bug for onnx pad for serveral case 29b09e125 xiaying [MNN:Bugfix] Fix bug for arm64-bf16 42ce00770 xiaying [MNN:Bugfix] Fix bug for ARM64 - float a2d89fc18 雁行 [Converter:Feature] Support Binary Unary for Torch. 7f1c0deb1 xiaying [MNN:Bugfix] Fix bug for Raster for Int8 8335a6f18 tianbu.xsw [OpenCL Shared Memory] modify data_format method b359e031b xiaying [ARM82:Bugfix] Fix bug for arm82 and speed up pack / unpack c8 24bf3fc88 雁行 [Convert:Feature] Support LayerNormFuse without gamma beta. 3e629624b xiaying [MNN:Bugfix] Fix bug for float - armv7a 2b7908ec7 tianbu.xsw modify workItemSize 3cee0d413 xiaying [MNN:Bugfix] test wrong clear 9cbbfb998 xiaying [MNN:Bugfix] fix compile bug for c++ < 14 2d7a44484 xiaying [MNN:Bugfix] fix compile bug for c++ < 14 eb7d0cb53 xiaying [Test:Bugfix] Don't test for NC4HW4 directly 7b40ca8d1 xiaying [MNN:Bugfix] Fix bug for ConvolutionGroup 2694d8a91 xiaying [MNN:Bugfix] Fix bug for CPUGridSample f89af60f6 xiaying [MNN:Bugfix] Fix compile bug for arm a151abcdd xiaying [MNN:Bugfix] Fix bug for convert for int8 / int16 b254dbe61 雁行 [MNN:Bugfix] Bugfix for Conv onClone. d08150631 xiaying [MNN:Bugfix] Fix bug for fast rcnn e5568a0df xiaying [MNN:Bugfix] Fix bug for CPURaster treat NC4HW4 fast blit 128318933 雁行 [Raster:Bugfix] bugfix for Raster merge onResize. 03caacbea xiaying [MNN:Bugfix] fix bug for CPUDeconvolution and Convolution1x1Strassen for iw != ow e1e3c245c xiaying [MNN:Bugfix] Fix bug for ConvolutionWinograd 2524cbc6d xiaying [MNN:Bugfix] Fix bug for CPUSoftmax 44ec79b8f xiaying [MNN:Bugfix] Fix bug for CPUConvolutionDepthwise / Scale / DeconvolutionDW 21ae956ce xiaying [MNN:Bugfix] Fix bug for Multi-Batch-TiledExecutor 09a5069c7 xiaying [MNN:Speed] Add offset for src and dst 6776c6784 xiaying [MNN:Bugfix] Fix bug for trainable model cc83ae30b xiaying [MNN:Bugfix] Fix bug for trainable model
2021-07-29 11:46:59 +08:00
uint32_t isa_features = 0;
#ifdef __aarch64__
isa_features = (uint32_t)getauxval(AT_HWCAP);
#endif
2020-07-04 01:21:30 +08:00
struct cpuinfo_android_properties android_properties;
cpuinfo_arm_android_parse_properties(&android_properties);
const struct cpuinfo_arm_chipset chipset =
cpuinfo_arm_android_decode_chipset(&android_properties, valid_processors, 0);
switch (last_midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
cpuinfo_isa->dot = true;
break;
default:
2021-04-08 15:34:23 +08:00
#ifdef __aarch64__
if (isa_features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) {
cpuinfo_isa->dot = true;
}
2021-04-08 15:34:23 +08:00
#endif
// TODO, whitelist, ex: hisilicon_kirin 980...
break;
}
2021-04-08 15:34:23 +08:00
#ifdef __aarch64__
const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP;
2020-07-04 01:21:30 +08:00
if ((isa_features & fp16arith_mask) == fp16arith_mask) {
if (chipset.series == cpuinfo_arm_chipset_series_samsung_exynos && chipset.model == 9810) {
cpuinfo_isa->fp16arith = false;
} else {
cpuinfo_isa->fp16arith = true;
}
}
2022-10-30 08:44:24 +08:00
if (isa_features & CPUINFO_ARM_LINUX_FEATURE_I8MM) {
cpuinfo_isa->i8mm = true;
}
/*
if (isa_features & CPUINFO_ARM_LINUX_FEATURE_SVE2) {
// MNN_PRINT("Support SVE2\n");
}
*/
2021-04-08 15:34:23 +08:00
#else
// pytorch/cpuinfo: src/arm/linux/aarch32-isa.c
uint32_t architecture_version = 0;
if (processors_count > 0) {
architecture_version = arm_linux_processors[0].architecture_version;
}
if (architecture_version >= 8) {
/*
* NEON FP16 compute extension and VQRDMLAH/VQRDMLSH instructions are not indicated in /proc/cpuinfo.
* Use a MIDR-based heuristic to whitelist processors known to support it:
* - Processors with Cortex-A55 cores
* - Processors with Cortex-A65 cores
* - Processors with Cortex-A75 cores
* - Processors with Cortex-A76 cores
* - Processors with Cortex-A77 cores
* - Processors with Exynos M4 cores
* - Processors with Exynos M5 cores
* - Neoverse N1 cores
*/
if (chipset.series == cpuinfo_arm_chipset_series_samsung_exynos && chipset.model == 9810) {
/* Only little cores of Exynos 9810 support FP16 & RDM */
MNN_PRINT("FP16 arithmetics and RDM disabled: only little cores in Exynos 9810 support these extensions");
} else {
switch (last_midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x4100D050): /* Cortex-A55 */
case UINT32_C(0x4100D060): /* Cortex-A65 */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
case UINT32_C(0x53000030): /* Exynos M4 */
case UINT32_C(0x53000040): /* Exynos M5 */
cpuinfo_isa->fp16arith = true;
break;
}
}
/*
* NEON VDOT instructions are not indicated in /proc/cpuinfo.
* Use a MIDR-based heuristic to whitelist processors known to support it.
*/
switch (last_midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
case UINT32_C(0x53000030): /* Exynos-M4 */
case UINT32_C(0x53000040): /* Exynos-M5 */
cpuinfo_isa->dot = true;
break;
case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */
cpuinfo_isa->dot = (midr_get_variant(last_midr) >= 1);
break;
case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */
cpuinfo_isa->dot = (midr_get_variant(last_midr) >= 2);
break;
}
}
#endif
2021-10-15 13:39:20 +08:00
if (arm_linux_processors) {
free(arm_linux_processors);
}
2020-07-04 01:21:30 +08:00
#endif // #ifdef __ANDROID__
2020-07-04 01:21:30 +08:00
// iOS
#if defined(__IOS__) && defined(__aarch64__)
// A11
#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
2020-07-04 01:21:30 +08:00
#define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
#endif
// A12
#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
2020-07-04 01:21:30 +08:00
#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
#endif
// A13
#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
#endif
// A14
#ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
#endif
// A15
#ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD
#define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xda33d83d
#endif
// A16
#ifndef CPUFAMILY_ARM_EVEREST_SAWTOOTH
#define CPUFAMILY_ARM_EVEREST_SAWTOOTH 0x8765edea
#endif
// A17 Pro
#ifndef CPUFAMILY_ARM_PCORE_ECORE_COLL
#define CPUFAMILY_ARM_PCORE_ECORE_COLL 0x2876f5b5
#endif
const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
// const uint32_t cpu_type = get_sys_info_by_name("hw.cputype");
// const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype");
2020-07-04 01:21:30 +08:00
cpuinfo_isa->fp16arith = cpu_family == CPUFAMILY_ARM_MONSOON_MISTRAL ||
cpu_family == CPUFAMILY_ARM_VORTEX_TEMPEST ||
cpu_family == CPUFAMILY_ARM_LIGHTNING_THUNDER ||
cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM ||
cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD ||
cpu_family == CPUFAMILY_ARM_EVEREST_SAWTOOTH ||
cpu_family == CPUFAMILY_ARM_PCORE_ECORE_COLL;
2020-07-04 01:21:30 +08:00
cpuinfo_isa->dot = cpu_family == CPUFAMILY_ARM_LIGHTNING_THUNDER ||
cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM ||
cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD ||
cpu_family == CPUFAMILY_ARM_EVEREST_SAWTOOTH ||
cpu_family == CPUFAMILY_ARM_PCORE_ECORE_COLL;
2020-07-04 01:21:30 +08:00
2023-12-27 17:26:44 +08:00
cpuinfo_isa->i8mm = cpu_family == CPUFAMILY_ARM_EVEREST_SAWTOOTH ||
cpu_family == CPUFAMILY_ARM_PCORE_ECORE_COLL;
#endif // iOS
// arm64-osx
#if defined(__APPLE__) && defined(__aarch64__) && !defined(__IOS__)
// Apple M1
#ifndef CPUFAMILY_AARCH64_FIRESTORM_ICESTORM
#define CPUFAMILY_AARCH64_FIRESTORM_ICESTORM 0x1b588bb3
#endif
// Apple M2
#ifndef CPUFAMILY_AARCH64_AVALANCHE_BLIZZARD
#define CPUFAMILY_AARCH64_AVALANCHE_BLIZZARD 0xda33d83d
#endif
const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
cpuinfo_isa->fp16arith = cpu_family == CPUFAMILY_AARCH64_FIRESTORM_ICESTORM ||
cpu_family == CPUFAMILY_AARCH64_AVALANCHE_BLIZZARD;
cpuinfo_isa->dot = cpu_family == CPUFAMILY_AARCH64_FIRESTORM_ICESTORM ||
cpu_family == CPUFAMILY_AARCH64_AVALANCHE_BLIZZARD;
#endif
#ifndef __ANDROID__
#if defined (__linux__) && defined (__aarch64__)
uint32_t isa_features = 0;
isa_features = (uint32_t)getauxval(AT_HWCAP);
if (isa_features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) {
cpuinfo_isa->dot = true;
}
const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP;
if ((isa_features & fp16arith_mask) == fp16arith_mask) {
cpuinfo_isa->fp16arith = true;
}
2022-11-08 17:05:14 +08:00
if (isa_features & CPUINFO_ARM_LINUX_FEATURE_I8MM) {
2022-10-30 08:44:24 +08:00
cpuinfo_isa->i8mm = true;
}
#endif /* __linux__ && __aarch64__ */
#endif
2023-04-18 18:54:46 +08:00
MNN_PRINT("The device support i8sdot:%d, support fp16:%d, support i8mm: %d\n", cpuinfo_isa->dot, cpuinfo_isa->fp16arith, cpuinfo_isa->i8mm);
}