Enable AES and SHA3 optimisations on Qualcomm Snapdragon X systems

For larger data blocks, AES gets performance gains of up to 26.5%,
and SHA3 up to 15%. Tested on a Dell 9345 with Snapdragon X1E-80-100.
Below are detailed data.

Before:
type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes  16384 bytes
AES-128-GCM     176805.90k   677599.17k  1932362.15k  4803628.03k  7696804.52k  8036674.22k
AES-192-GCM     178621.88k   673165.14k  1884515.41k  4690465.11k  7669383.17k  8021562.71k
AES-256-GCM     177187.09k   668302.19k  1843664.13k  4405732.35k  6911937.19k  7201800.19k
sha3-224         71694.30k   286854.87k   686716.67k   784932.86k   922643.11k   926182.06k
sha3-256         71997.70k   288208.13k   688777.98k   786555.22k   865192.62k   874643.46k
sha3-384         72167.35k   287085.80k   487257.00k   639260.33k   673180.33k   675108.18k
sha3-512         71581.52k   287953.09k   378381.31k   436353.02k   471433.22k   472804.01k

After:
type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes  16384 bytes
AES-128-GCM     177140.20k   674365.06k  1928108.37k  5285980.84k  9581854.72k 10167205.89k
AES-192-GCM     178512.57k   669584.09k  1890147.75k  5052911.96k  8831027.88k  9337787.73k
AES-256-GCM     177685.05k   664798.83k  1844782.17k  4816555.35k  8176435.20k  8592359.42k
sha3-224         71444.47k   286178.99k   747224.32k   911052.46k  1063985.15k  1067712.51k
sha3-256         71188.32k   285318.74k   746304.34k   903716.52k   984566.44k   997430.61k
sha3-384         71715.30k   285965.27k   546631.59k   733781.67k   766448.98k   768262.14k
sha3-512         70853.85k   284883.33k   427553.71k   489698.99k   520415.91k   520743.59k

Change:
type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes  16384 bytes
AES-128-GCM      +0.2 %       -0.5 %      -0.2 %      +10.0 %      +24.5 %     +26.5 %
AES-192-GCM      -0.1 %       -0.5 %      +0.3 %       +7.7 %      +15.1 %     +16.4 %
AES-256-GCM      +0.3 %       -0.5 %      +0.1 %       +9.3 %      +18.3 %     +19.3 %
sha3-224         -0.3 %       -0.2 %      +8.8 %      +16.1 %      +15.3 %     +15.3 %
sha3-256         -1.1 %       -1.0 %      +8.4 %      +14.9 %      +13.8 %     +14.0 %
sha3-384         -0.6 %       -0.4 %     +12.2 %      +14.8 %      +13.9 %     +13.8 %
sha3-512         -1.0 %       -1.1 %     +13.0 %      +12.2 %      +10.4 %     +10.1 %

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>

Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <ppzgs1@gmail.com>
(Merged from https://github.com/openssl/openssl/pull/27503)
This commit is contained in:
Aleksander Jan Bajkowski 2025-04-25 15:30:50 +02:00 committed by Pauli
parent 1eee02d3e7
commit fda4777c14
2 changed files with 6 additions and 1 deletions

View File

@ -102,6 +102,7 @@ extern unsigned int OPENSSL_armv8_rsa_neonized;
# define ARM_CPU_IMP_ARM 0x41
# define HISI_CPU_IMP 0x48
# define ARM_CPU_IMP_QCOMM 0x51
# define ARM_CPU_IMP_APPLE 0x61
# define ARM_CPU_IMP_MICROSOFT 0x6D
# define ARM_CPU_IMP_AMPERE 0xC0
@ -115,6 +116,8 @@ extern unsigned int OPENSSL_armv8_rsa_neonized;
# define ARM_CPU_PART_N3 0xD8E
# define ARM_CPU_PART_V3 0xD84
# define QCOM_CPU_PART_ORYON_X1 0x001
# define APPLE_CPU_PART_M1_ICESTORM 0x022
# define APPLE_CPU_PART_M1_FIRESTORM 0x023
# define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024

View File

@ -416,6 +416,7 @@ void OPENSSL_cpuid_setup(void)
}
if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N2) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_QCOMM, QCOM_CPU_PART_ORYON_X1) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_COBALT_100) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N3) ||
@ -440,7 +441,8 @@ void OPENSSL_cpuid_setup(void)
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)) &&
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_QCOMM, QCOM_CPU_PART_ORYON_X1)) &&
(OPENSSL_armcap_P & ARMV8_SHA3))
OPENSSL_armcap_P |= ARMV8_HAVE_SHA3_AND_WORTH_USING;
# endif