mirror of https://github.com/openssl/openssl.git
Optimize AES-CTR for ARM Neoverse V1 and V2.
Unroll AES-CTR loops to a maximum 12 blocks for ARM Neoverse V1 and V2, to fully utilize their AES pipeline resources. Improvement on ARM Neoverse V1. Package Size(Bytes) 16 32 64 128 256 1024 Improvement(%) 3.93 -0.45 11.30 4.31 12.48 37.66 Package Size(Bytes) 1500 8192 16384 61440 65536 Improvement(%) 37.16 38.90 39.89 40.55 40.41 Change-Id: Ifb8fad9af22476259b9ba75132bc3d8010a7fdbd Reviewed-by: Tom Cosgrove <tom.cosgrove@arm.com> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/22733)
This commit is contained in:
parent
c8fe4b5948
commit
cc82b09cbd
|
@ -1751,6 +1751,747 @@ $code.=<<___;
|
||||||
.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
|
.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
|
||||||
___
|
___
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
{{{
|
||||||
|
my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
|
||||||
|
my ($rounds,$roundsx,$cnt,$key_)=("w5","x5","w6","x7");
|
||||||
|
my ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12));
|
||||||
|
my ($tctr3,$tctr4,$tctr5,$tctr6)=map("w$_",(11,13..15));
|
||||||
|
my ($tctr7,$tctr8,$tctr9,$tctr10,$tctr11)=map("w$_",(19..23));
|
||||||
|
|
||||||
|
# q0-q7 => v0-v7; q8-q23 => v16-v31; q24-q31 => v8-v15
|
||||||
|
my ($ivec,$rndlast,$rndping,$rndpang)=map("q$_",(0..3));
|
||||||
|
my ($in0,$in1,$in2,$in3,$in4,$in5)=map("q$_",(4..9));
|
||||||
|
my ($in6,$in7,$in8,$in9,$in10,$in11)=map("q$_",(10..15));
|
||||||
|
my ($dat0,$dat1,$dat2,$dat3,$dat4,$dat5)=map("q$_",(16..21));
|
||||||
|
my ($dat6,$dat7,$dat8,$dat9,$dat10,$dat11)=map("q$_",(22..27));
|
||||||
|
my ($tmp0,$tmp1,$tmp2)=map("q$_",(25..27));
|
||||||
|
|
||||||
|
#q_X => qX, for ldp & stp
|
||||||
|
my ($in0q,$in1q,$in2q,$in3q)=map("q_$_",(4..7));
|
||||||
|
my ($in4q,$in5q,$in6q,$in7q,$in8q,$in9q,$in10q,$in11q)=map("q_$_",(16..23));
|
||||||
|
|
||||||
|
my ($dat8d,$dat9d,$dat10d,$dat11d)=map("d$_",(8..11));
|
||||||
|
|
||||||
|
$code.=<<___ if ($flavour =~ /64/);
|
||||||
|
.globl ${prefix}_ctr32_encrypt_blocks_unroll12_eor3
|
||||||
|
.type ${prefix}_ctr32_encrypt_blocks_unroll12_eor3,%function
|
||||||
|
.align 5
|
||||||
|
${prefix}_ctr32_encrypt_blocks_unroll12_eor3:
|
||||||
|
AARCH64_VALID_CALL_TARGET
|
||||||
|
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||||
|
stp x29,x30,[sp,#-16]!
|
||||||
|
add x29,sp,#0
|
||||||
|
|
||||||
|
ldr $rounds,[$key,#240]
|
||||||
|
|
||||||
|
ldr $ctr, [$ivp, #12]
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
vld1.8 {$dat0},[$ivp]
|
||||||
|
#else
|
||||||
|
vld1.32 {$dat0},[$ivp]
|
||||||
|
#endif
|
||||||
|
vld1.32 {$rndping-$rndpang},[$key] // load key schedule...
|
||||||
|
sub $rounds,$rounds,#4
|
||||||
|
cmp $len,#2
|
||||||
|
add $key_,$key,$roundsx,lsl#4 // pointer to last round key
|
||||||
|
sub $rounds,$rounds,#2
|
||||||
|
add $key_, $key_, #64
|
||||||
|
vld1.32 {$rndlast},[$key_]
|
||||||
|
add $key_,$key,#32
|
||||||
|
mov $cnt,$rounds
|
||||||
|
#ifndef __AARCH64EB__
|
||||||
|
rev $ctr, $ctr
|
||||||
|
#endif
|
||||||
|
|
||||||
|
vorr $dat1,$dat0,$dat0
|
||||||
|
add $tctr1, $ctr, #1
|
||||||
|
vorr $dat2,$dat0,$dat0
|
||||||
|
add $ctr, $ctr, #2
|
||||||
|
vorr $ivec,$dat0,$dat0
|
||||||
|
rev $tctr1, $tctr1
|
||||||
|
vmov.32 ${dat1}[3],$tctr1
|
||||||
|
b.ls .Lctr32_tail_unroll
|
||||||
|
cmp $len,#6
|
||||||
|
rev $tctr2, $ctr
|
||||||
|
sub $len,$len,#3 // bias
|
||||||
|
vmov.32 ${dat2}[3],$tctr2
|
||||||
|
b.lo .Loop3x_ctr32_unroll
|
||||||
|
cmp $len,#9
|
||||||
|
vorr $dat3,$dat0,$dat0
|
||||||
|
add $tctr3, $ctr, #1
|
||||||
|
vorr $dat4,$dat0,$dat0
|
||||||
|
add $tctr4, $ctr, #2
|
||||||
|
rev $tctr3, $tctr3
|
||||||
|
vorr $dat5,$dat0,$dat0
|
||||||
|
add $ctr, $ctr, #3
|
||||||
|
rev $tctr4, $tctr4
|
||||||
|
vmov.32 ${dat3}[3],$tctr3
|
||||||
|
rev $tctr5, $ctr
|
||||||
|
vmov.32 ${dat4}[3],$tctr4
|
||||||
|
vmov.32 ${dat5}[3],$tctr5
|
||||||
|
sub $len,$len,#3
|
||||||
|
b.lo .Loop6x_ctr32_unroll
|
||||||
|
|
||||||
|
// push regs to stack when 12 data chunks are interleaved
|
||||||
|
stp x19,x20,[sp,#-16]!
|
||||||
|
stp x21,x22,[sp,#-16]!
|
||||||
|
stp x23,x24,[sp,#-16]!
|
||||||
|
stp $dat8d,$dat9d,[sp,#-32]!
|
||||||
|
stp $dat10d,$dat11d,[sp,#-32]!
|
||||||
|
|
||||||
|
add $tctr6,$ctr,#1
|
||||||
|
add $tctr7,$ctr,#2
|
||||||
|
add $tctr8,$ctr,#3
|
||||||
|
add $tctr9,$ctr,#4
|
||||||
|
add $tctr10,$ctr,#5
|
||||||
|
add $ctr,$ctr,#6
|
||||||
|
vorr $dat6,$dat0,$dat0
|
||||||
|
rev $tctr6,$tctr6
|
||||||
|
vorr $dat7,$dat0,$dat0
|
||||||
|
rev $tctr7,$tctr7
|
||||||
|
vorr $dat8,$dat0,$dat0
|
||||||
|
rev $tctr8,$tctr8
|
||||||
|
vorr $dat9,$dat0,$dat0
|
||||||
|
rev $tctr9,$tctr9
|
||||||
|
vorr $dat10,$dat0,$dat0
|
||||||
|
rev $tctr10,$tctr10
|
||||||
|
vorr $dat11,$dat0,$dat0
|
||||||
|
rev $tctr11,$ctr
|
||||||
|
|
||||||
|
sub $len,$len,#6 // bias
|
||||||
|
vmov.32 ${dat6}[3],$tctr6
|
||||||
|
vmov.32 ${dat7}[3],$tctr7
|
||||||
|
vmov.32 ${dat8}[3],$tctr8
|
||||||
|
vmov.32 ${dat9}[3],$tctr9
|
||||||
|
vmov.32 ${dat10}[3],$tctr10
|
||||||
|
vmov.32 ${dat11}[3],$tctr11
|
||||||
|
b .Loop12x_ctr32_unroll
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
.Loop12x_ctr32_unroll:
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndping
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndping
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndping
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
aese $dat6,$rndping
|
||||||
|
aesmc $dat6,$dat6
|
||||||
|
aese $dat7,$rndping
|
||||||
|
aesmc $dat7,$dat7
|
||||||
|
aese $dat8,$rndping
|
||||||
|
aesmc $dat8,$dat8
|
||||||
|
aese $dat9,$rndping
|
||||||
|
aesmc $dat9,$dat9
|
||||||
|
aese $dat10,$rndping
|
||||||
|
aesmc $dat10,$dat10
|
||||||
|
aese $dat11,$rndping
|
||||||
|
aesmc $dat11,$dat11
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
subs $cnt,$cnt,#2
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndpang
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndpang
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndpang
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
aese $dat6,$rndpang
|
||||||
|
aesmc $dat6,$dat6
|
||||||
|
aese $dat7,$rndpang
|
||||||
|
aesmc $dat7,$dat7
|
||||||
|
aese $dat8,$rndpang
|
||||||
|
aesmc $dat8,$dat8
|
||||||
|
aese $dat9,$rndpang
|
||||||
|
aesmc $dat9,$dat9
|
||||||
|
aese $dat10,$rndpang
|
||||||
|
aesmc $dat10,$dat10
|
||||||
|
aese $dat11,$rndpang
|
||||||
|
aesmc $dat11,$dat11
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
b.gt .Loop12x_ctr32_unroll
|
||||||
|
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndping
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndping
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndping
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
aese $dat6,$rndping
|
||||||
|
aesmc $dat6,$dat6
|
||||||
|
aese $dat7,$rndping
|
||||||
|
aesmc $dat7,$dat7
|
||||||
|
aese $dat8,$rndping
|
||||||
|
aesmc $dat8,$dat8
|
||||||
|
aese $dat9,$rndping
|
||||||
|
aesmc $dat9,$dat9
|
||||||
|
aese $dat10,$rndping
|
||||||
|
aesmc $dat10,$dat10
|
||||||
|
aese $dat11,$rndping
|
||||||
|
aesmc $dat11,$dat11
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndpang
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndpang
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndpang
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
aese $dat6,$rndpang
|
||||||
|
aesmc $dat6,$dat6
|
||||||
|
aese $dat7,$rndpang
|
||||||
|
aesmc $dat7,$dat7
|
||||||
|
aese $dat8,$rndpang
|
||||||
|
aesmc $dat8,$dat8
|
||||||
|
aese $dat9,$rndpang
|
||||||
|
aesmc $dat9,$dat9
|
||||||
|
aese $dat10,$rndpang
|
||||||
|
aesmc $dat10,$dat10
|
||||||
|
aese $dat11,$rndpang
|
||||||
|
aesmc $dat11,$dat11
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
add $tctr0,$ctr,#1
|
||||||
|
add $tctr1,$ctr,#2
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
add $tctr2,$ctr,#3
|
||||||
|
add $tctr3,$ctr,#4
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
add $tctr4,$ctr,#5
|
||||||
|
add $tctr5,$ctr,#6
|
||||||
|
rev $tctr0,$tctr0
|
||||||
|
aese $dat3,$rndping
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
add $tctr6,$ctr,#7
|
||||||
|
add $tctr7,$ctr,#8
|
||||||
|
rev $tctr1,$tctr1
|
||||||
|
rev $tctr2,$tctr2
|
||||||
|
aese $dat4,$rndping
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
add $tctr8,$ctr,#9
|
||||||
|
add $tctr9,$ctr,#10
|
||||||
|
rev $tctr3,$tctr3
|
||||||
|
rev $tctr4,$tctr4
|
||||||
|
aese $dat5,$rndping
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
add $tctr10,$ctr,#11
|
||||||
|
add $tctr11,$ctr,#12
|
||||||
|
rev $tctr5,$tctr5
|
||||||
|
rev $tctr6,$tctr6
|
||||||
|
aese $dat6,$rndping
|
||||||
|
aesmc $dat6,$dat6
|
||||||
|
rev $tctr7,$tctr7
|
||||||
|
rev $tctr8,$tctr8
|
||||||
|
aese $dat7,$rndping
|
||||||
|
aesmc $dat7,$dat7
|
||||||
|
rev $tctr9,$tctr9
|
||||||
|
rev $tctr10,$tctr10
|
||||||
|
aese $dat8,$rndping
|
||||||
|
aesmc $dat8,$dat8
|
||||||
|
rev $tctr11,$tctr11
|
||||||
|
aese $dat9,$rndping
|
||||||
|
aesmc $dat9,$dat9
|
||||||
|
aese $dat10,$rndping
|
||||||
|
aesmc $dat10,$dat10
|
||||||
|
aese $dat11,$rndping
|
||||||
|
aesmc $dat11,$dat11
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndpang
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
vld1.8 {$in0,$in1,$in2,$in3},[$inp],#64
|
||||||
|
aese $dat4,$rndpang
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndpang
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
aese $dat6,$rndpang
|
||||||
|
aesmc $dat6,$dat6
|
||||||
|
aese $dat7,$rndpang
|
||||||
|
aesmc $dat7,$dat7
|
||||||
|
vld1.8 {$in4,$in5,$in6,$in7},[$inp],#64
|
||||||
|
aese $dat8,$rndpang
|
||||||
|
aesmc $dat8,$dat8
|
||||||
|
aese $dat9,$rndpang
|
||||||
|
aesmc $dat9,$dat9
|
||||||
|
aese $dat10,$rndpang
|
||||||
|
aesmc $dat10,$dat10
|
||||||
|
aese $dat11,$rndpang
|
||||||
|
aesmc $dat11,$dat11
|
||||||
|
vld1.8 {$in8,$in9,$in10,$in11},[$inp],#64
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
|
||||||
|
mov $key_, $key
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndping
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndping
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndping
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
aese $dat6,$rndping
|
||||||
|
aesmc $dat6,$dat6
|
||||||
|
aese $dat7,$rndping
|
||||||
|
aesmc $dat7,$dat7
|
||||||
|
aese $dat8,$rndping
|
||||||
|
aesmc $dat8,$dat8
|
||||||
|
aese $dat9,$rndping
|
||||||
|
aesmc $dat9,$dat9
|
||||||
|
aese $dat10,$rndping
|
||||||
|
aesmc $dat10,$dat10
|
||||||
|
aese $dat11,$rndping
|
||||||
|
aesmc $dat11,$dat11
|
||||||
|
vld1.32 {$rndping},[$key_],#16 // re-pre-load rndkey[0]
|
||||||
|
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
eor3 $in0,$in0,$rndlast,$dat0
|
||||||
|
vorr $dat0,$ivec,$ivec
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
eor3 $in1,$in1,$rndlast,$dat1
|
||||||
|
vorr $dat1,$ivec,$ivec
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
eor3 $in2,$in2,$rndlast,$dat2
|
||||||
|
vorr $dat2,$ivec,$ivec
|
||||||
|
aese $dat3,$rndpang
|
||||||
|
eor3 $in3,$in3,$rndlast,$dat3
|
||||||
|
vorr $dat3,$ivec,$ivec
|
||||||
|
aese $dat4,$rndpang
|
||||||
|
eor3 $in4,$in4,$rndlast,$dat4
|
||||||
|
vorr $dat4,$ivec,$ivec
|
||||||
|
aese $dat5,$rndpang
|
||||||
|
eor3 $in5,$in5,$rndlast,$dat5
|
||||||
|
vorr $dat5,$ivec,$ivec
|
||||||
|
aese $dat6,$rndpang
|
||||||
|
eor3 $in6,$in6,$rndlast,$dat6
|
||||||
|
vorr $dat6,$ivec,$ivec
|
||||||
|
aese $dat7,$rndpang
|
||||||
|
eor3 $in7,$in7,$rndlast,$dat7
|
||||||
|
vorr $dat7,$ivec,$ivec
|
||||||
|
aese $dat8,$rndpang
|
||||||
|
eor3 $in8,$in8,$rndlast,$dat8
|
||||||
|
vorr $dat8,$ivec,$ivec
|
||||||
|
aese $dat9,$rndpang
|
||||||
|
eor3 $in9,$in9,$rndlast,$dat9
|
||||||
|
vorr $dat9,$ivec,$ivec
|
||||||
|
aese $dat10,$rndpang
|
||||||
|
eor3 $in10,$in10,$rndlast,$dat10
|
||||||
|
vorr $dat10,$ivec,$ivec
|
||||||
|
aese $dat11,$rndpang
|
||||||
|
eor3 $in11,$in11,$rndlast,$dat11
|
||||||
|
vorr $dat11,$ivec,$ivec
|
||||||
|
vld1.32 {$rndpang},[$key_],#16 // re-pre-load rndkey[1]
|
||||||
|
|
||||||
|
vmov.32 ${dat0}[3],$tctr0
|
||||||
|
vmov.32 ${dat1}[3],$tctr1
|
||||||
|
vmov.32 ${dat2}[3],$tctr2
|
||||||
|
vmov.32 ${dat3}[3],$tctr3
|
||||||
|
vst1.8 {$in0,$in1,$in2,$in3},[$out],#64
|
||||||
|
vmov.32 ${dat4}[3],$tctr4
|
||||||
|
vmov.32 ${dat5}[3],$tctr5
|
||||||
|
vmov.32 ${dat6}[3],$tctr6
|
||||||
|
vmov.32 ${dat7}[3],$tctr7
|
||||||
|
vst1.8 {$in4,$in5,$in6,$in7},[$out],#64
|
||||||
|
vmov.32 ${dat8}[3],$tctr8
|
||||||
|
vmov.32 ${dat9}[3],$tctr9
|
||||||
|
vmov.32 ${dat10}[3],$tctr10
|
||||||
|
vmov.32 ${dat11}[3],$tctr11
|
||||||
|
vst1.8 {$in8,$in9,$in10,$in11},[$out],#64
|
||||||
|
|
||||||
|
mov $cnt,$rounds
|
||||||
|
|
||||||
|
add $ctr,$ctr,#12
|
||||||
|
subs $len,$len,#12
|
||||||
|
b.hs .Loop12x_ctr32_unroll
|
||||||
|
|
||||||
|
// pop regs from stack when 12 data chunks are interleaved
|
||||||
|
ldp $dat10d,$dat11d,[sp],#32
|
||||||
|
ldp $dat8d,$dat9d,[sp],#32
|
||||||
|
ldp x23,x24,[sp],#16
|
||||||
|
ldp x21,x22,[sp],#16
|
||||||
|
ldp x19,x20,[sp],#16
|
||||||
|
|
||||||
|
add $len,$len,#12
|
||||||
|
cbz $len,.Lctr32_done_unroll
|
||||||
|
sub $ctr,$ctr,#12
|
||||||
|
|
||||||
|
cmp $len,#2
|
||||||
|
b.ls .Lctr32_tail_unroll
|
||||||
|
|
||||||
|
cmp $len,#6
|
||||||
|
sub $len,$len,#3 // bias
|
||||||
|
add $ctr,$ctr,#3
|
||||||
|
b.lo .Loop3x_ctr32_unroll
|
||||||
|
|
||||||
|
sub $len,$len,#3
|
||||||
|
add $ctr,$ctr,#3
|
||||||
|
b.lo .Loop6x_ctr32_unroll
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
.Loop6x_ctr32_unroll:
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndping
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndping
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndping
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
subs $cnt,$cnt,#2
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndpang
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndpang
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndpang
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
b.gt .Loop6x_ctr32_unroll
|
||||||
|
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndping
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndping
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndping
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndpang
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndpang
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndpang
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
add $tctr0,$ctr,#1
|
||||||
|
add $tctr1,$ctr,#2
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
add $tctr2,$ctr,#3
|
||||||
|
add $tctr3,$ctr,#4
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
add $tctr4,$ctr,#5
|
||||||
|
add $tctr5,$ctr,#6
|
||||||
|
rev $tctr0,$tctr0
|
||||||
|
aese $dat3,$rndping
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
rev $tctr1,$tctr1
|
||||||
|
rev $tctr2,$tctr2
|
||||||
|
aese $dat4,$rndping
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
rev $tctr3,$tctr3
|
||||||
|
rev $tctr4,$tctr4
|
||||||
|
aese $dat5,$rndping
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
rev $tctr5,$tctr5
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
vld1.8 {$in0,$in1,$in2,$in3},[$inp],#64
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndpang
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
vld1.8 {$in4,$in5},[$inp],#32
|
||||||
|
aese $dat4,$rndpang
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndpang
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
|
||||||
|
mov $key_, $key
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
aese $dat3,$rndping
|
||||||
|
aesmc $dat3,$dat3
|
||||||
|
aese $dat4,$rndping
|
||||||
|
aesmc $dat4,$dat4
|
||||||
|
aese $dat5,$rndping
|
||||||
|
aesmc $dat5,$dat5
|
||||||
|
vld1.32 {$rndping},[$key_],#16 // re-pre-load rndkey[0]
|
||||||
|
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
eor3 $in0,$in0,$rndlast,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
eor3 $in1,$in1,$rndlast,$dat1
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
eor3 $in2,$in2,$rndlast,$dat2
|
||||||
|
aese $dat3,$rndpang
|
||||||
|
eor3 $in3,$in3,$rndlast,$dat3
|
||||||
|
aese $dat4,$rndpang
|
||||||
|
eor3 $in4,$in4,$rndlast,$dat4
|
||||||
|
aese $dat5,$rndpang
|
||||||
|
eor3 $in5,$in5,$rndlast,$dat5
|
||||||
|
vld1.32 {$rndpang},[$key_],#16 // re-pre-load rndkey[1]
|
||||||
|
|
||||||
|
vorr $dat0,$ivec,$ivec
|
||||||
|
vorr $dat1,$ivec,$ivec
|
||||||
|
vorr $dat2,$ivec,$ivec
|
||||||
|
vorr $dat3,$ivec,$ivec
|
||||||
|
vorr $dat4,$ivec,$ivec
|
||||||
|
vorr $dat5,$ivec,$ivec
|
||||||
|
|
||||||
|
vmov.32 ${dat0}[3],$tctr0
|
||||||
|
vmov.32 ${dat1}[3],$tctr1
|
||||||
|
vst1.8 {$in0,$in1,$in2,$in3},[$out],#64
|
||||||
|
vmov.32 ${dat2}[3],$tctr2
|
||||||
|
vmov.32 ${dat3}[3],$tctr3
|
||||||
|
vst1.8 {$in4,$in5},[$out],#32
|
||||||
|
vmov.32 ${dat4}[3],$tctr4
|
||||||
|
vmov.32 ${dat5}[3],$tctr5
|
||||||
|
|
||||||
|
cbz $len,.Lctr32_done_unroll
|
||||||
|
mov $cnt,$rounds
|
||||||
|
|
||||||
|
cmp $len,#2
|
||||||
|
b.ls .Lctr32_tail_unroll
|
||||||
|
|
||||||
|
sub $len,$len,#3 // bias
|
||||||
|
add $ctr,$ctr,#3
|
||||||
|
b .Loop3x_ctr32_unroll
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
.Loop3x_ctr32_unroll:
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
subs $cnt,$cnt,#2
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
b.gt .Loop3x_ctr32_unroll
|
||||||
|
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $tmp0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $tmp1,$dat1
|
||||||
|
vld1.8 {$in0,$in1,$in2},[$inp],#48
|
||||||
|
vorr $dat0,$ivec,$ivec
|
||||||
|
aese $dat2,$rndping
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
vorr $dat1,$ivec,$ivec
|
||||||
|
aese $tmp0,$rndpang
|
||||||
|
aesmc $tmp0,$tmp0
|
||||||
|
aese $tmp1,$rndpang
|
||||||
|
aesmc $tmp1,$tmp1
|
||||||
|
aese $dat2,$rndpang
|
||||||
|
aesmc $tmp2,$dat2
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
vorr $dat2,$ivec,$ivec
|
||||||
|
add $tctr0,$ctr,#1
|
||||||
|
aese $tmp0,$rndping
|
||||||
|
aesmc $tmp0,$tmp0
|
||||||
|
aese $tmp1,$rndping
|
||||||
|
aesmc $tmp1,$tmp1
|
||||||
|
add $tctr1,$ctr,#2
|
||||||
|
aese $tmp2,$rndping
|
||||||
|
aesmc $tmp2,$tmp2
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
add $ctr,$ctr,#3
|
||||||
|
aese $tmp0,$rndpang
|
||||||
|
aesmc $tmp0,$tmp0
|
||||||
|
aese $tmp1,$rndpang
|
||||||
|
aesmc $tmp1,$tmp1
|
||||||
|
|
||||||
|
rev $tctr0,$tctr0
|
||||||
|
aese $tmp2,$rndpang
|
||||||
|
aesmc $tmp2,$tmp2
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
vmov.32 ${dat0}[3], $tctr0
|
||||||
|
mov $key_,$key
|
||||||
|
rev $tctr1,$tctr1
|
||||||
|
aese $tmp0,$rndping
|
||||||
|
aesmc $tmp0,$tmp0
|
||||||
|
|
||||||
|
aese $tmp1,$rndping
|
||||||
|
aesmc $tmp1,$tmp1
|
||||||
|
vmov.32 ${dat1}[3], $tctr1
|
||||||
|
rev $tctr2,$ctr
|
||||||
|
aese $tmp2,$rndping
|
||||||
|
aesmc $tmp2,$tmp2
|
||||||
|
vmov.32 ${dat2}[3], $tctr2
|
||||||
|
|
||||||
|
aese $tmp0,$rndpang
|
||||||
|
aese $tmp1,$rndpang
|
||||||
|
aese $tmp2,$rndpang
|
||||||
|
|
||||||
|
eor3 $in0,$in0,$rndlast,$tmp0
|
||||||
|
vld1.32 {$rndping},[$key_],#16 // re-pre-load rndkey[0]
|
||||||
|
eor3 $in1,$in1,$rndlast,$tmp1
|
||||||
|
mov $cnt,$rounds
|
||||||
|
eor3 $in2,$in2,$rndlast,$tmp2
|
||||||
|
vld1.32 {$rndpang},[$key_],#16 // re-pre-load rndkey[1]
|
||||||
|
vst1.8 {$in0,$in1,$in2},[$out],#48
|
||||||
|
|
||||||
|
cbz $len,.Lctr32_done_unroll
|
||||||
|
|
||||||
|
.Lctr32_tail_unroll:
|
||||||
|
cmp $len,#1
|
||||||
|
b.eq .Lctr32_tail_1_unroll
|
||||||
|
|
||||||
|
.Lctr32_tail_2_unroll:
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
subs $cnt,$cnt,#2
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
b.gt .Lctr32_tail_2_unroll
|
||||||
|
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
vld1.8 {$in0,$in1},[$inp],#32
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat1,$rndping
|
||||||
|
aesmc $dat1,$dat1
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aese $dat1,$rndpang
|
||||||
|
|
||||||
|
eor3 $in0,$in0,$rndlast,$dat0
|
||||||
|
eor3 $in1,$in1,$rndlast,$dat1
|
||||||
|
vst1.8 {$in0,$in1},[$out],#32
|
||||||
|
b .Lctr32_done_unroll
|
||||||
|
|
||||||
|
.Lctr32_tail_1_unroll:
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
subs $cnt,$cnt,#2
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
b.gt .Lctr32_tail_1_unroll
|
||||||
|
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
vld1.8 {$in0},[$inp]
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
vld1.32 {$rndping},[$key_],#16
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
vld1.32 {$rndpang},[$key_],#16
|
||||||
|
aese $dat0,$rndping
|
||||||
|
aesmc $dat0,$dat0
|
||||||
|
aese $dat0,$rndpang
|
||||||
|
|
||||||
|
eor3 $in0,$in0,$rndlast,$dat0
|
||||||
|
vst1.8 {$in0},[$out],#16
|
||||||
|
|
||||||
|
.Lctr32_done_unroll:
|
||||||
|
ldr x29,[sp],#16
|
||||||
|
ret
|
||||||
|
.size ${prefix}_ctr32_encrypt_blocks_unroll12_eor3,.-${prefix}_ctr32_encrypt_blocks_unroll12_eor3
|
||||||
|
___
|
||||||
|
}}}
|
||||||
|
|
||||||
{{{
|
{{{
|
||||||
my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
|
my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
|
||||||
my ($rounds,$cnt,$key_)=("w5","w6","x7");
|
my ($rounds,$cnt,$key_)=("w5","w6","x7");
|
||||||
|
@ -3640,7 +4381,8 @@ ___
|
||||||
if ($flavour =~ /64/) { ######## 64-bit code
|
if ($flavour =~ /64/) { ######## 64-bit code
|
||||||
my %opcode = (
|
my %opcode = (
|
||||||
"aesd" => 0x4e285800, "aese" => 0x4e284800,
|
"aesd" => 0x4e285800, "aese" => 0x4e284800,
|
||||||
"aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 );
|
"aesimc"=> 0x4e287800, "aesmc" => 0x4e286800,
|
||||||
|
"eor3" => 0xce000000, );
|
||||||
|
|
||||||
local *unaes = sub {
|
local *unaes = sub {
|
||||||
my ($mnemonic,$arg)=@_;
|
my ($mnemonic,$arg)=@_;
|
||||||
|
@ -3651,10 +4393,21 @@ if ($flavour =~ /64/) { ######## 64-bit code
|
||||||
$mnemonic,$arg;
|
$mnemonic,$arg;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
sub unsha3 {
|
||||||
|
my ($mnemonic,$arg)=@_;
|
||||||
|
|
||||||
|
$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv#]([0-9\-]+))?)?/
|
||||||
|
&&
|
||||||
|
sprintf ".inst\t0x%08x\t//%s %s",
|
||||||
|
$opcode{$mnemonic}|$1|($2<<5)|($3<<16)|(eval($4)<<10),
|
||||||
|
$mnemonic,$arg;
|
||||||
|
}
|
||||||
|
|
||||||
foreach(split("\n",$code)) {
|
foreach(split("\n",$code)) {
|
||||||
s/\`([^\`]*)\`/eval($1)/geo;
|
s/\`([^\`]*)\`/eval($1)/geo;
|
||||||
|
|
||||||
s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers
|
s/\bq([0-9]+)\b/"v".($1<8?$1:($1<24?$1+8:$1-16)).".16b"/geo; # old->new registers
|
||||||
|
s/\bq_([0-9]+)\b/"q".$1/geo; # old->new registers
|
||||||
s/@\s/\/\//o; # old->new style commentary
|
s/@\s/\/\//o; # old->new style commentary
|
||||||
|
|
||||||
#s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
|
#s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
|
||||||
|
@ -3667,6 +4420,7 @@ if ($flavour =~ /64/) { ######## 64-bit code
|
||||||
s/vshr/ushr/o or
|
s/vshr/ushr/o or
|
||||||
s/^(\s+)v/$1/o or # strip off v prefix
|
s/^(\s+)v/$1/o or # strip off v prefix
|
||||||
s/\bbx\s+lr\b/ret/o;
|
s/\bbx\s+lr\b/ret/o;
|
||||||
|
s/\b(eor3)\s+(v.*)/unsha3($1,$2)/ge;
|
||||||
|
|
||||||
# fix up remaining legacy suffixes
|
# fix up remaining legacy suffixes
|
||||||
s/\.[ui]?8//o;
|
s/\.[ui]?8//o;
|
||||||
|
|
|
@ -88,6 +88,7 @@ extern unsigned int OPENSSL_armv8_rsa_neonized;
|
||||||
# define ARMV8_SVE (1<<13)
|
# define ARMV8_SVE (1<<13)
|
||||||
# define ARMV8_SVE2 (1<<14)
|
# define ARMV8_SVE2 (1<<14)
|
||||||
# define ARMV8_HAVE_SHA3_AND_WORTH_USING (1<<15)
|
# define ARMV8_HAVE_SHA3_AND_WORTH_USING (1<<15)
|
||||||
|
# define ARMV8_UNROLL12_EOR3 (1<<16)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MIDR_EL1 system register
|
* MIDR_EL1 system register
|
||||||
|
|
|
@ -421,6 +421,10 @@ void OPENSSL_cpuid_setup(void)
|
||||||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2)) &&
|
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2)) &&
|
||||||
(OPENSSL_armcap_P & ARMV8_SHA3))
|
(OPENSSL_armcap_P & ARMV8_SHA3))
|
||||||
OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
|
OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
|
||||||
|
if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) ||
|
||||||
|
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2)) &&
|
||||||
|
(OPENSSL_armcap_P & ARMV8_SHA3))
|
||||||
|
OPENSSL_armcap_P |= ARMV8_UNROLL12_EOR3;
|
||||||
if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) ||
|
if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) ||
|
||||||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) ||
|
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) ||
|
||||||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) ||
|
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) ||
|
||||||
|
|
|
@ -107,17 +107,21 @@ void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp, size_t len);
|
||||||
# define HWAES_cbc_encrypt aes_v8_cbc_encrypt
|
# define HWAES_cbc_encrypt aes_v8_cbc_encrypt
|
||||||
# define HWAES_ecb_encrypt aes_v8_ecb_encrypt
|
# define HWAES_ecb_encrypt aes_v8_ecb_encrypt
|
||||||
# if __ARM_MAX_ARCH__>=8 && (defined(__aarch64__) || defined(_M_ARM64))
|
# if __ARM_MAX_ARCH__>=8 && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
|
# define ARMv8_HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES)
|
||||||
# define HWAES_xts_encrypt aes_v8_xts_encrypt
|
# define HWAES_xts_encrypt aes_v8_xts_encrypt
|
||||||
# define HWAES_xts_decrypt aes_v8_xts_decrypt
|
# define HWAES_xts_decrypt aes_v8_xts_decrypt
|
||||||
# endif
|
# endif
|
||||||
# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks
|
# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks
|
||||||
|
# define HWAES_ctr32_encrypt_blocks_unroll12_eor3 aes_v8_ctr32_encrypt_blocks_unroll12_eor3
|
||||||
# define AES_PMULL_CAPABLE ((OPENSSL_armcap_P & ARMV8_PMULL) && (OPENSSL_armcap_P & ARMV8_AES))
|
# define AES_PMULL_CAPABLE ((OPENSSL_armcap_P & ARMV8_PMULL) && (OPENSSL_armcap_P & ARMV8_AES))
|
||||||
|
# define AES_UNROLL12_EOR3_CAPABLE (OPENSSL_armcap_P & ARMV8_UNROLL12_EOR3)
|
||||||
# define AES_GCM_ENC_BYTES 512
|
# define AES_GCM_ENC_BYTES 512
|
||||||
# define AES_GCM_DEC_BYTES 512
|
# define AES_GCM_DEC_BYTES 512
|
||||||
# if __ARM_MAX_ARCH__>=8 && (defined(__aarch64__) || defined(_M_ARM64))
|
# if __ARM_MAX_ARCH__>=8 && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
# define AES_gcm_encrypt armv8_aes_gcm_encrypt
|
# define AES_gcm_encrypt armv8_aes_gcm_encrypt
|
||||||
# define AES_gcm_decrypt armv8_aes_gcm_decrypt
|
# define AES_gcm_decrypt armv8_aes_gcm_decrypt
|
||||||
# define AES_GCM_ASM(gctx) ((gctx)->ctr==aes_v8_ctr32_encrypt_blocks && \
|
# define AES_GCM_ASM(gctx) (((gctx)->ctr==aes_v8_ctr32_encrypt_blocks_unroll12_eor3 || \
|
||||||
|
(gctx)->ctr==aes_v8_ctr32_encrypt_blocks) && \
|
||||||
(gctx)->gcm.funcs.ghash==gcm_ghash_v8)
|
(gctx)->gcm.funcs.ghash==gcm_ghash_v8)
|
||||||
/* The [unroll8_eor3_]aes_gcm_(enc|dec)_(128|192|256)_kernel() functions
|
/* The [unroll8_eor3_]aes_gcm_(enc|dec)_(128|192|256)_kernel() functions
|
||||||
* take input length in BITS and return number of BYTES processed */
|
* take input length in BITS and return number of BYTES processed */
|
||||||
|
@ -545,6 +549,11 @@ void HWAES_ecb_encrypt(const unsigned char *in, unsigned char *out,
|
||||||
void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
|
void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
|
||||||
size_t len, const void *key,
|
size_t len, const void *key,
|
||||||
const unsigned char ivec[16]);
|
const unsigned char ivec[16]);
|
||||||
|
# if defined(AES_UNROLL12_EOR3_CAPABLE)
|
||||||
|
void HWAES_ctr32_encrypt_blocks_unroll12_eor3(const unsigned char *in, unsigned char *out,
|
||||||
|
size_t len, const void *key,
|
||||||
|
const unsigned char ivec[16]);
|
||||||
|
# endif
|
||||||
void HWAES_xts_encrypt(const unsigned char *inp, unsigned char *out,
|
void HWAES_xts_encrypt(const unsigned char *inp, unsigned char *out,
|
||||||
size_t len, const AES_KEY *key1,
|
size_t len, const AES_KEY *key1,
|
||||||
const AES_KEY *key2, const unsigned char iv[16]);
|
const AES_KEY *key2, const unsigned char iv[16]);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2019-2022 The OpenSSL Project Authors. All Rights Reserved.
|
* Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
||||||
* this file except in compliance with the License. You can obtain a copy
|
* this file except in compliance with the License. You can obtain a copy
|
||||||
|
@ -86,8 +86,13 @@ static int armv8_aes_gcm_initkey(PROV_GCM_CTX *ctx, const unsigned char *key,
|
||||||
PROV_AES_GCM_CTX *actx = (PROV_AES_GCM_CTX *)ctx;
|
PROV_AES_GCM_CTX *actx = (PROV_AES_GCM_CTX *)ctx;
|
||||||
AES_KEY *ks = &actx->ks.ks;
|
AES_KEY *ks = &actx->ks.ks;
|
||||||
|
|
||||||
GCM_HW_SET_KEY_CTR_FN(ks, aes_v8_set_encrypt_key, aes_v8_encrypt,
|
if (AES_UNROLL12_EOR3_CAPABLE) {
|
||||||
aes_v8_ctr32_encrypt_blocks);
|
GCM_HW_SET_KEY_CTR_FN(ks, aes_v8_set_encrypt_key, aes_v8_encrypt,
|
||||||
|
aes_v8_ctr32_encrypt_blocks_unroll12_eor3);
|
||||||
|
} else {
|
||||||
|
GCM_HW_SET_KEY_CTR_FN(ks, aes_v8_set_encrypt_key, aes_v8_encrypt,
|
||||||
|
aes_v8_ctr32_encrypt_blocks);
|
||||||
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -146,6 +146,8 @@ const PROV_CIPHER_HW *ossl_prov_cipher_hw_aes_##mode(size_t keybits) \
|
||||||
# include "cipher_aes_hw_rv64i.inc"
|
# include "cipher_aes_hw_rv64i.inc"
|
||||||
#elif defined(__riscv) && __riscv_xlen == 32
|
#elif defined(__riscv) && __riscv_xlen == 32
|
||||||
# include "cipher_aes_hw_rv32i.inc"
|
# include "cipher_aes_hw_rv32i.inc"
|
||||||
|
#elif defined (ARMv8_HWAES_CAPABLE)
|
||||||
|
# include "cipher_aes_hw_armv8.inc"
|
||||||
#else
|
#else
|
||||||
/* The generic case */
|
/* The generic case */
|
||||||
# define PROV_CIPHER_HW_declare(mode)
|
# define PROV_CIPHER_HW_declare(mode)
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
||||||
|
* this file except in compliance with the License. You can obtain a copy
|
||||||
|
* in the file LICENSE in the source distribution or at
|
||||||
|
* https://www.openssl.org/source/license.html
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Crypto extension support for AES modes ecb, cbc, ofb, cfb, ctr.
|
||||||
|
* This file is included by cipher_aes_hw.c
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int cipher_hw_aes_arm_initkey(PROV_CIPHER_CTX *dat,
|
||||||
|
const unsigned char *key,
|
||||||
|
size_t keylen)
|
||||||
|
{
|
||||||
|
int ret = cipher_hw_aes_initkey(dat, key, keylen);
|
||||||
|
if (AES_UNROLL12_EOR3_CAPABLE && dat->mode == EVP_CIPH_CTR_MODE)
|
||||||
|
dat->stream.ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks_unroll12_eor3;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PROV_CIPHER_HW_declare(mode) \
|
||||||
|
static const PROV_CIPHER_HW aes_arm_##mode = { \
|
||||||
|
cipher_hw_aes_arm_initkey, \
|
||||||
|
ossl_cipher_hw_generic_##mode, \
|
||||||
|
cipher_hw_aes_copyctx \
|
||||||
|
};
|
||||||
|
#define PROV_CIPHER_HW_select(mode) \
|
||||||
|
if (ARMv8_HWAES_CAPABLE) \
|
||||||
|
return &aes_arm_##mode;
|
Loading…
Reference in New Issue