@ -217,34 +217,15 @@ K256:
. word 0x90befffa , 0xa4506ceb , 0xbef9a3f7 , 0xc67178f2
. size K256 , . - K256
. word 0 @ terminator
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
. LOPENSSL_armcap:
. word OPENSSL_armcap_P - . Lsha256_block_data_order
#endif
. align 5
. global sha256_block_data_order
. type sha256_block_data_order , % function
sha256_block_data_order:
. Lsha256_block_data_order:
adr r3 , . Lsha256_block_data_order
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12 , . LOPENSSL_armcap
ldr r12 , [ r3 , r12 ] @ OPENSSL_armcap_P
#ifdef __APPLE__
ldr r12 , [ r12 ]
#endif
tst r12 , #ARMV8_SHA256
bne . LARMv8
tst r12 , #ARMV7_NEON
bne . LNEON
#endif
. global sha256_block_data_order_nohw
. type sha256_block_data_order_nohw , % function
sha256_block_data_order_nohw:
add $ len , $ inp , $ len , lsl #6 @ len to point at the end of inp
stmdb sp ! , { $ ctx , $ inp , $ len , r4 - r11 , lr }
ldmia $ ctx , { $ A , $ B , $ C , $ D , $ E , $ F , $ G , $ H }
@ TODO ( davidben ) : When the OPENSSL_armcap logic above is removed ,
@ replace this with a simple ADR .
sub $ Ktbl , r3 , # 2 5 6 + 3 2 @ K256
adr $ Ktbl , K256
sub sp , sp , # 1 6 * 4 @ alloca ( X [ 1 6 ] )
. Loop:
# if __ARM_ARCH>=7
@ -298,7 +279,7 @@ $code.=<<___;
moveq pc , lr @ be binary compatible with V4 , yet
bx lr @ interoperable with Thumb ISA: - )
#endif
. size sha256_block_data_order , . - sha256_block_data_order
. size sha256_block_data_order_nohw , . - sha256_block_data_order_nohw
___
######################################################################
# NEON stuff
@ -483,10 +464,12 @@ $code.=<<___;
. align 5
. skip 16
sha256_block_data_order_neon:
. LNEON:
stmdb sp ! , { r4 - r12 , lr }
sub $ H , sp , # 1 6 * 4 + 1 6
@ In Arm mode , the following ADR runs up against the limits of encodable
@ offsets . It only fits because the offset , when the ADR is placed here ,
@ is a multiple of 16 .
adr $ Ktbl , K256
bic $ H , $ H , #15 @ align for 128-bit stores
mov $ t2 , sp
@ -613,12 +596,26 @@ $code.=<<___;
# define INST(a,b,c,d) .byte a,b,c,d
# endif
. type sha256_block_data_order_armv8 , % function
. LK256_shortcut:
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode .
#if defined(__thumb2__)
. word K256 - ( . LK256_add + 4 )
#else
. word K256 - ( . LK256_add + 8 )
#endif
. global sha256_block_data_order_hw
. type sha256_block_data_order_hw , % function
. align 5
sha256_block_data_order_armv8:
. LARMv8:
sha256_block_data_order_hw:
@ K256 is too far to reference from one ADR command in Thumb mode . In
@ Arm mode , we could make it fit by aligning the ADR offset to a 64 - byte
@ boundary . For simplicity , just load the offset from . LK256_shortcut .
ldr $ Ktbl , . LK256_shortcut
. LK256_add:
add $ Ktbl , pc , $ Ktbl
vld1 .32 { $ ABCD , $ EFGH } , [ $ ctx ]
sub $ Ktbl , $ Ktbl , # 2 5 6 + 3 2
add $ len , $ inp , $ len , lsl #6 @ len to point at the end of inp
b . Loop_v8
@ -680,17 +677,13 @@ $code.=<<___;
vst1 .32 { $ ABCD , $ EFGH } , [ $ ctx ]
ret @ bx lr
. size sha256_block_data_order_armv8 , . - sha256_block_data_order_armv8
. size sha256_block_data_order_hw , . - sha256_block_data_order_hw
#endif
___
} } }
$ code . = << ___ ;
. asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
. align 2
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
. comm OPENSSL_armcap_P , 4 , 4
. hidden OPENSSL_armcap_P
#endif
___
open SELF , $ 0 ;