Fix `VC-WIN64-CLANGASM-ARM` target

This was broken for Windows ARM64 in #22949

The comments were in the wrong format, and a define check of `_M_ARM64` was missed in places
This commit is contained in:
Anthony Roberts 2025-07-03 11:21:37 +01:00
parent ac6178c3f1
commit 2775bcb423
6 changed files with 261 additions and 253 deletions

View File

@ -32,7 +32,7 @@ open OUT,"| \"$^X\" $xlate $flavour \"$output\""
$code=<<___;
#include "arm_arch.h"
# Theses are offsets into the CIPH_DIGEST struct
// These are offsets into the CIPH_DIGEST struct
#define CIPHER_KEY 0
#define CIPHER_KEY_ROUNDS 8
#define CIPHER_IV 16
@ -149,68 +149,70 @@ ___
}
$code.=<<___;
# Description:
#
# Combined Enc/Auth Primitive = aes128cbc/sha1_hmac
#
# Operations:
#
# out = encrypt-AES128CBC(in)
# return_hash_ptr = SHA1(o_key_pad | SHA1(i_key_pad | out))
#
# Prototype:
# int asm_aescbc_sha1_hmac(uint8_t *csrc, uint8_t *cdst, uint64_t clen,
# uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
# CIPH_DIGEST *arg)
#
# Registers used:
#
# asm_aescbc_sha1_hmac(
# csrc, x0 (cipher src address)
# cdst, x1 (cipher dst address)
# clen x2 (cipher length)
# dsrc, x3 (digest src address)
# ddst, x4 (digest dst address)
# dlen, x5 (digest length)
# arg x6:
# arg->cipher.key (round keys)
# arg->cipher.key_rounds (key rounds)
# arg->cipher.iv (initialization vector)
# arg->digest.hmac.i_key_pad (partially hashed i_key_pad)
# arg->digest.hmac.o_key_pad (partially hashed o_key_pad)
# )
#
# Routine register definitions:
#
# v0 - v3 -- aes results
# v4 - v7 -- round consts for sha
# v8 - v18 -- round keys
# v19 -- temp register for SHA1
# v20 -- ABCD copy (q20)
# v21 -- sha working state (q21)
# v22 -- sha working state (q22)
# v23 -- temp register for SHA1
# v24 -- sha state ABCD
# v25 -- sha state E
# v26 -- sha block 0
# v27 -- sha block 1
# v28 -- sha block 2
# v29 -- sha block 3
# v30 -- reserved
# v31 -- reserved
#
# Constraints:
#
# The variable "clen" must be a multiple of 16, otherwise results are not
# defined. For AES partial blocks the user is required to pad the input
# to modulus 16 = 0.
# The variable "dlen" must be a multiple of 8 and greater or equal
# to "clen". This constraint is strictly related to the needs of the IPSec
# ESP packet. Encrypted payload is hashed along with the 8 byte ESP header,
# forming ICV. Speed gain is achieved by doing both things at the same time,
# hence lengths are required to match at least at the cipher level.
#
# Short lengths are not optimized at < 12 AES blocks
/*
* Description:
*
* Combined Enc/Auth Primitive = aes128cbc/sha1_hmac
*
* Operations:
*
* out = encrypt-AES128CBC(in)
* return_hash_ptr = SHA1(o_key_pad | SHA1(i_key_pad | out))
*
* Prototype:
* int asm_aescbc_sha1_hmac(uint8_t *csrc, uint8_t *cdst, uint64_t clen,
* uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
* CIPH_DIGEST *arg)
*
* Registers used:
*
* asm_aescbc_sha1_hmac(
* csrc, x0 (cipher src address)
* cdst, x1 (cipher dst address)
* clen x2 (cipher length)
* dsrc, x3 (digest src address)
* ddst, x4 (digest dst address)
* dlen, x5 (digest length)
* arg x6:
* arg->cipher.key (round keys)
* arg->cipher.key_rounds (key rounds)
* arg->cipher.iv (initialization vector)
* arg->digest.hmac.i_key_pad (partially hashed i_key_pad)
* arg->digest.hmac.o_key_pad (partially hashed o_key_pad)
* )
*
* Routine register definitions:
*
* v0 - v3 -- aes results
* v4 - v7 -- round consts for sha
* v8 - v18 -- round keys
* v19 -- temp register for SHA1
* v20 -- ABCD copy (q20)
* v21 -- sha working state (q21)
* v22 -- sha working state (q22)
* v23 -- temp register for SHA1
* v24 -- sha state ABCD
* v25 -- sha state E
* v26 -- sha block 0
* v27 -- sha block 1
* v28 -- sha block 2
* v29 -- sha block 3
* v30 -- reserved
* v31 -- reserved
*
* Constraints:
*
* The variable "clen" must be a multiple of 16, otherwise results are not
* defined. For AES partial blocks the user is required to pad the input
* to modulus 16 = 0.
* The variable "dlen" must be a multiple of 8 and greater or equal
* to "clen". This constraint is strictly related to the needs of the IPSec
* ESP packet. Encrypted payload is hashed along with the 8 byte ESP header,
* forming ICV. Speed gain is achieved by doing both things at the same time,
* hence lengths are required to match at least at the cipher level.
*
* Short lengths are not optimized at < 12 AES blocks
*/
.global asm_aescbc_sha1_hmac
.type asm_aescbc_sha1_hmac,%function
@ -2322,68 +2324,70 @@ $code.=<<___;
.size asm_aescbc_sha1_hmac, .-asm_aescbc_sha1_hmac
# Description:
#
# Combined Auth/Dec Primitive = sha1_hmac/aes128cbc
#
# Operations:
#
# out = decrypt-AES128CBC(in)
# return_ash_ptr = SHA1(o_key_pad | SHA1(i_key_pad | in))
#
# Prototype:
# asm_sha1_hmac_aescbc_dec(uint8_t *csrc, uint8_t *cdst, uint64_t clen,
# uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
# CIPH_DIGEST *arg)
#
# Registers used:
#
# asm_sha1_hmac_aescbc_dec(
# csrc, x0 (cipher src address)
# cdst, x1 (cipher dst address)
# clen x2 (cipher length)
# dsrc, x3 (digest src address)
# ddst, x4 (digest dst address)
# dlen, x5 (digest length)
# arg x6 :
# arg->cipher.key (round keys)
# arg->cipher.key_rounds (key rounds)
# arg->cipher.iv (initialization vector)
# arg->digest.hmac.i_key_pad (partially hashed i_key_pad)
# arg->digest.hmac.o_key_pad (partially hashed o_key_pad)
#
#
# Routine register definitions:
#
# v0 - v3 -- aes results
# v4 - v7 -- round consts for sha
# v8 - v18 -- round keys
# v19 -- temp register for SHA1
# v20 -- ABCD copy (q20)
# v21 -- sha working state (q21)
# v22 -- sha working state (q22)
# v23 -- temp register for SHA1
# v24 -- sha state ABCD
# v25 -- sha state E
# v26 -- sha block 0
# v27 -- sha block 1
# v28 -- sha block 2
# v29 -- sha block 3
# v30 -- reserved
# v31 -- reserved
#
#
# Constraints:
#
# The variable "clen" must be a multiple of 16, otherwise results are not
# defined. For AES partial blocks the user is required to pad the input
# to modulus 16 = 0.
#
# The variable "dlen" must be a multiple of 8 and greater or equal to "clen".
# The maximum difference between "dlen" and "clen" cannot exceed 64 bytes.
# This constrain is strictly related to the needs of the IPSec ESP packet.
# Short lengths are less optimized at < 16 AES blocks, however they are
# somewhat optimized, and more so than the enc/auth versions.
/*
* Description:
*
* Combined Auth/Dec Primitive = sha1_hmac/aes128cbc
*
* Operations:
*
* out = decrypt-AES128CBC(in)
* return_ash_ptr = SHA1(o_key_pad | SHA1(i_key_pad | in))
*
* Prototype:
* asm_sha1_hmac_aescbc_dec(uint8_t *csrc, uint8_t *cdst, uint64_t clen,
* uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
* CIPH_DIGEST *arg)
*
* Registers used:
*
* asm_sha1_hmac_aescbc_dec(
* csrc, x0 (cipher src address)
* cdst, x1 (cipher dst address)
* clen x2 (cipher length)
* dsrc, x3 (digest src address)
* ddst, x4 (digest dst address)
* dlen, x5 (digest length)
* arg x6 :
* arg->cipher.key (round keys)
* arg->cipher.key_rounds (key rounds)
* arg->cipher.iv (initialization vector)
* arg->digest.hmac.i_key_pad (partially hashed i_key_pad)
* arg->digest.hmac.o_key_pad (partially hashed o_key_pad)
*
*
* Routine register definitions:
*
* v0 - v3 -- aes results
* v4 - v7 -- round consts for sha
* v8 - v18 -- round keys
* v19 -- temp register for SHA1
* v20 -- ABCD copy (q20)
* v21 -- sha working state (q21)
* v22 -- sha working state (q22)
* v23 -- temp register for SHA1
* v24 -- sha state ABCD
* v25 -- sha state E
* v26 -- sha block 0
* v27 -- sha block 1
* v28 -- sha block 2
* v29 -- sha block 3
* v30 -- reserved
* v31 -- reserved
*
*
* Constraints:
*
* The variable "clen" must be a multiple of 16, otherwise results are not
* defined. For AES partial blocks the user is required to pad the input
* to modulus 16 = 0.
*
* The variable "dlen" must be a multiple of 8 and greater or equal to "clen".
* The maximum difference between "dlen" and "clen" cannot exceed 64 bytes.
* This constrain is strictly related to the needs of the IPSec ESP packet.
* Short lengths are less optimized at < 16 AES blocks, however they are
* somewhat optimized, and more so than the enc/auth versions.
*/
.global asm_sha1_hmac_aescbc_dec
.type asm_sha1_hmac_aescbc_dec,%function

View File

@ -32,7 +32,7 @@ open OUT,"| \"$^X\" $xlate $flavour \"$output\""
$code=<<___;
#include "arm_arch.h"
# Theses are offsets into the CIPH_DIGEST struct
// These are offsets into the CIPH_DIGEST struct
#define CIPHER_KEY 0
#define CIPHER_KEY_ROUNDS 8
#define CIPHER_IV 16
@ -149,67 +149,69 @@ ___
}
$code.=<<___;
# Description:
#
# Combined Enc/Auth Primitive = aes128cbc/sha256_hmac
#
# Operations:
#
# out = encrypt-AES128CBC(in)
# return_hash_ptr = SHA256(o_key_pad | SHA256(i_key_pad | out))
#
# Prototype:
# void asm_aescbc_sha256_hmac(uint8_t *csrc, uint8_t *cdst, uint64_t clen,
# uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
# CIPH_DIGEST *arg)
#
# Registers used:
#
# asm_aescbc_sha256_hmac(
# csrc, x0 (cipher src address)
# cdst, x1 (cipher dst address)
# clen x2 (cipher length)
# dsrc, x3 (digest src address)
# ddst, x4 (digest dst address)
# dlen, x5 (digest length)
# arg x6 :
# arg->cipher.key (round keys)
# arg->cipher.key_rounds (key rounds)
# arg->cipher.iv (initialization vector)
# arg->digest.hmac.i_key_pad (partially hashed i_key_pad)
# arg->digest.hmac.o_key_pad (partially hashed o_key_pad)
# )
#
# Routine register definitions:
#
# v0 -- v3 -- aes results
# v4 -- v7 -- round consts for sha
# v8 -- v18 -- round keys
# v19 -- v20 -- round keys
# v21 -- ABCD tmp
# v22 -- sha working state ABCD (q22)
# v23 -- sha working state EFGH (q23)
# v24 -- sha state ABCD
# v25 -- sha state EFGH
# v26 -- sha block 0
# v27 -- sha block 1
# v28 -- sha block 2
# v29 -- sha block 3
# v30 -- reserved
# v31 -- reserved
#
# Constraints:
#
# The variable "clen" must be a multiple of 16, otherwise results
# are not defined. For AES partial blocks the user is required
# to pad the input to modulus 16 = 0.
# The variable "dlen" must be a multiple of 8 and greater or equal
# to "clen". This constrain is strictly related to the needs of the IPSec
# ESP packet. Encrypted payload is hashed along with the 8 byte ESP header,
# forming ICV. Speed gain is achieved by doing both things at the same time,
# hence lengths are required to match at least at the cipher level.
#
# Short lengths are not optimized at < 12 AES blocks
/*
* Description:
*
* Combined Enc/Auth Primitive = aes128cbc/sha256_hmac
*
* Operations:
*
* out = encrypt-AES128CBC(in)
* return_hash_ptr = SHA256(o_key_pad | SHA256(i_key_pad | out))
*
* Prototype:
* void asm_aescbc_sha256_hmac(uint8_t *csrc, uint8_t *cdst, uint64_t clen,
* uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
* CIPH_DIGEST *arg)
*
* Registers used:
*
* asm_aescbc_sha256_hmac(
* csrc, x0 (cipher src address)
* cdst, x1 (cipher dst address)
* clen x2 (cipher length)
* dsrc, x3 (digest src address)
* ddst, x4 (digest dst address)
* dlen, x5 (digest length)
* arg x6 :
* arg->cipher.key (round keys)
* arg->cipher.key_rounds (key rounds)
* arg->cipher.iv (initialization vector)
* arg->digest.hmac.i_key_pad (partially hashed i_key_pad)
* arg->digest.hmac.o_key_pad (partially hashed o_key_pad)
* )
*
* Routine register definitions:
*
* v0 -- v3 -- aes results
* v4 -- v7 -- round consts for sha
* v8 -- v18 -- round keys
* v19 -- v20 -- round keys
* v21 -- ABCD tmp
* v22 -- sha working state ABCD (q22)
* v23 -- sha working state EFGH (q23)
* v24 -- sha state ABCD
* v25 -- sha state EFGH
* v26 -- sha block 0
* v27 -- sha block 1
* v28 -- sha block 2
* v29 -- sha block 3
* v30 -- reserved
* v31 -- reserved
*
* Constraints:
*
* The variable "clen" must be a multiple of 16, otherwise results
* are not defined. For AES partial blocks the user is required
* to pad the input to modulus 16 = 0.
* The variable "dlen" must be a multiple of 8 and greater or equal
* to "clen". This constrain is strictly related to the needs of the IPSec
* ESP packet. Encrypted payload is hashed along with the 8 byte ESP header,
* forming ICV. Speed gain is achieved by doing both things at the same time,
* hence lengths are required to match at least at the cipher level.
*
* Short lengths are not optimized at < 12 AES blocks
*/
.global asm_aescbc_sha256_hmac
.type asm_aescbc_sha256_hmac,%function
@ -2472,68 +2474,70 @@ $code.=<<___;
.size asm_aescbc_sha256_hmac, .-asm_aescbc_sha256_hmac
# Description:
#
# Combined Auth/Dec Primitive = sha256_hmac/aes128cbc
#
# Operations:
#
# out = decrypt-AES128CBC(in)
# return_ash_ptr = SHA256(o_key_pad | SHA256(i_key_pad | in))
#
# Prototype:
#
# void asm_sha256_hmac_aescbc_dec(uint8_t *csrc, uint8_t *cdst, uint64_t clen,
# uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
# CIPH_DIGEST *arg)
#
# Registers used:
#
# asm_sha256_hmac_aescbc_dec(
# csrc, x0 (cipher src address)
# cdst, x1 (cipher dst address)
# clen x2 (cipher length)
# dsrc, x3 (digest src address)
# ddst, x4 (digest dst address)
# dlen, x5 (digest length)
# arg x6:
# arg->cipher.key (round keys)
# arg->cipher.key_rounds (key rounds)
# arg->cipher.iv (initialization vector)
# arg->digest.hmac.i_key_pad (partially hashed i_key_pad)
# arg->digest.hmac.o_key_pad (partially hashed o_key_pad)
# )
#
# Routine register definitions:
#
# v0 - v3 -- aes results
# v4 - v7 -- round consts for sha
# v8 - v18 -- round keys
# v19 - v20 -- round keys
# v21 -- ABCD tmp
# v22 -- sha working state ABCD (q22)
# v23 -- sha working state EFGH (q23)
# v24 -- sha state ABCD
# v25 -- sha state EFGH
# v26 -- sha block 0
# v27 -- sha block 1
# v28 -- sha block 2
# v29 -- sha block 3
# v30 -- reserved
# v31 -- reserved
#
#
# Constraints:
#
# The variable "clen" must be a multiple of 16, otherwise results are not
# defined For AES partial blocks the user is required to pad the input to
# modulus 16 = 0.
#
# The variable "dlen" must be a multiple of 8 and greater or equal to "clen".
# The maximum difference between "dlen" and "clen" cannot exceed 64 bytes.
# This constrain is strictly related to the needs of the IPSec ESP packet.
# Short lengths are less optimized at < 16 AES blocks, however they are
# somewhat optimized, and more so than the enc/auth versions.
/*
* Description:
*
* Combined Auth/Dec Primitive = sha256_hmac/aes128cbc
*
* Operations:
*
* out = decrypt-AES128CBC(in)
* return_ash_ptr = SHA256(o_key_pad | SHA256(i_key_pad | in))
*
* Prototype:
*
* void asm_sha256_hmac_aescbc_dec(uint8_t *csrc, uint8_t *cdst, uint64_t clen,
* uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
* CIPH_DIGEST *arg)
*
* Registers used:
*
* asm_sha256_hmac_aescbc_dec(
* csrc, x0 (cipher src address)
* cdst, x1 (cipher dst address)
* clen x2 (cipher length)
* dsrc, x3 (digest src address)
* ddst, x4 (digest dst address)
* dlen, x5 (digest length)
* arg x6:
* arg->cipher.key (round keys)
* arg->cipher.key_rounds (key rounds)
* arg->cipher.iv (initialization vector)
* arg->digest.hmac.i_key_pad (partially hashed i_key_pad)
* arg->digest.hmac.o_key_pad (partially hashed o_key_pad)
* )
*
* Routine register definitions:
*
* v0 - v3 -- aes results
* v4 - v7 -- round consts for sha
* v8 - v18 -- round keys
* v19 - v20 -- round keys
* v21 -- ABCD tmp
* v22 -- sha working state ABCD (q22)
* v23 -- sha working state EFGH (q23)
* v24 -- sha state ABCD
* v25 -- sha state EFGH
* v26 -- sha block 0
* v27 -- sha block 1
* v28 -- sha block 2
* v29 -- sha block 3
* v30 -- reserved
* v31 -- reserved
*
*
* Constraints:
*
* The variable "clen" must be a multiple of 16, otherwise results are not
* defined For AES partial blocks the user is required to pad the input to
* modulus 16 = 0.
*
* The variable "dlen" must be a multiple of 8 and greater or equal to "clen".
* The maximum difference between "dlen" and "clen" cannot exceed 64 bytes.
* This constrain is strictly related to the needs of the IPSec ESP packet.
* Short lengths are less optimized at < 16 AES blocks, however they are
* somewhat optimized, and more so than the enc/auth versions.
*/
.global asm_sha256_hmac_aescbc_dec
.type asm_sha256_hmac_aescbc_dec,%function

View File

@ -24,7 +24,7 @@ open OUT,"| \"$^X\" $xlate $flavour \"$output\""
$code=<<___;
#include "arm_arch.h"
# Theses are offsets into the CIPH_DIGEST struct
// These are offsets into the CIPH_DIGEST struct
#define CIPHER_KEY 0
#define CIPHER_KEY_ROUNDS 8
#define CIPHER_IV 16

View File

@ -22,7 +22,7 @@ const PROV_CIPHER_HW_AES_HMAC_SHA_ETM *ossl_prov_cipher_hw_aes_cbc_hmac_sha1_etm
#else
void sha1_block_data_order(void *c, const void *p, size_t len);
# if defined(__aarch64__)
# if defined(__aarch64__) || defined(_M_ARM64)
int asm_aescbc_sha1_hmac(const uint8_t *csrc, uint8_t *cdst, uint64_t clen,
uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
CIPH_DIGEST *arg);

View File

@ -22,7 +22,7 @@ const PROV_CIPHER_HW_AES_HMAC_SHA_ETM *ossl_prov_cipher_hw_aes_cbc_hmac_sha256_e
#else
void sha256_block_data_order(void *c, const void *p, size_t len);
# if defined(__aarch64__)
# if defined(__aarch64__) || defined(_M_ARM64)
int asm_aescbc_sha256_hmac(const uint8_t *csrc, uint8_t *cdst, uint64_t clen,
uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
CIPH_DIGEST *arg);

View File

@ -26,7 +26,7 @@ const PROV_CIPHER_HW_AES_HMAC_SHA_ETM *ossl_prov_cipher_hw_aes_cbc_hmac_sha512_e
return NULL;
}
#else
# if defined(__aarch64__)
# if defined(__aarch64__) || defined(_M_ARM64)
void asm_aescbc_sha512_hmac(const uint8_t *csrc, uint8_t *cdst, uint64_t clen,
uint8_t *dsrc, uint8_t *ddst, uint64_t dlen,
CIPH_DIGEST *arg);