Don't overread in poly_Rq_mul

The polynomials have 701, 16-bit values. But poly_Rq_mul was reading 32
bytes at offset 1384 in order to get the last 18 of them. This silently
worked for a long time, but when 7153013019 switched to keeping
variables on the stack it was noticed by Valgrind.

This change fixes the overread. Setting watchpoints at the ends of the
two inputs (and one output) now shows no overreads nor overwrites.

BUG=424

Change-Id: Id86c1407ffce66593541c10feee47213f4b95c5d
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/48645
Reviewed-by: David Benjamin <davidben@google.com>
grpc-202302
Adam Langley 3 years ago
parent 5799ebfe5f
commit f1d153dc36
  1. 64
      crypto/hrss/asm/poly_rq_mul.S

@ -26,23 +26,6 @@
# This file was generated by poly_rq_mul.py
.text
.align 32
mask_low9words:
.word 0xffff
.word 0xffff
.word 0xffff
.word 0xffff
.word 0xffff
.word 0xffff
.word 0xffff
.word 0xffff
.word 0xffff
.word 0x0
.word 0x0
.word 0x0
.word 0x0
.word 0x0
.word 0x0
.word 0x0
const3:
.word 3
.word 3
@ -746,8 +729,20 @@ vmovdqu 328(%rsi), %ymm12
vmovdqu 1120(%rsi), %ymm4
vmovdqu 1208(%rsi), %ymm5
vmovdqu 1296(%rsi), %ymm6
vmovdqu 1384(%rsi), %ymm7
vpand mask_low9words(%rip), %ymm7, %ymm7
# Only 18 bytes more can be read, but vmovdqu reads 32.
# Copy 18 bytes to the red zone and zero pad to 32 bytes.
xor %r9, %r9
movq %r9, -16(%rsp)
movq %r9, -8(%rsp)
movq 1384(%rsi), %r9
movq %r9, -32(%rsp)
movq 1384+8(%rsi), %r9
movq %r9, -24(%rsp)
movw 1384+16(%rsi), %r9w
movw %r9w, -16(%rsp)
vmovdqu -32(%rsp), %ymm7
vmovdqu 416(%rsi), %ymm8
vmovdqu 504(%rsi), %ymm9
vmovdqu 592(%rsi), %ymm10
@ -1341,8 +1336,20 @@ vmovdqu 328(%rdx), %ymm12
vmovdqu 1120(%rdx), %ymm4
vmovdqu 1208(%rdx), %ymm5
vmovdqu 1296(%rdx), %ymm6
vmovdqu 1384(%rdx), %ymm7
vpand mask_low9words(%rip), %ymm7, %ymm7
# Only 18 bytes more can be read, but vmovdqu reads 32.
# Copy 18 bytes to the red zone and zero pad to 32 bytes.
xor %r9, %r9
movq %r9, -16(%rsp)
movq %r9, -8(%rsp)
movq 1384(%rdx), %r9
movq %r9, -32(%rsp)
movq 1384+8(%rdx), %r9
movq %r9, -24(%rsp)
movw 1384+16(%rdx), %r9w
movw %r9w, -16(%rsp)
vmovdqu -32(%rsp), %ymm7
vmovdqu 416(%rdx), %ymm8
vmovdqu 504(%rdx), %ymm9
vmovdqu 592(%rdx), %ymm10
@ -8295,7 +8302,20 @@ vpshufb shufmin1_mask3(%rip), %ymm8, %ymm8
vmovdqa %ymm8, 2880(%r8)
vmovdqu 680(%rdi), %ymm8
vmovdqu 1032(%rdi), %ymm10
vmovdqu 1384(%rdi), %ymm2
# Only 18 bytes can be read at 1384, but vmovdqu reads 32.
# Copy 18 bytes to the red zone and zero pad to 32 bytes.
xor %r9, %r9
movq %r9, -16(%rsp)
movq %r9, -8(%rsp)
movq 1384(%rdi), %r9
movq %r9, -32(%rsp)
movq 1384+8(%rdi), %r9
movq %r9, -24(%rsp)
movw 1384+16(%rdi), %r9w
movw %r9w, -16(%rsp)
vmovdqu -32(%rsp), %ymm2
vpaddw %ymm5, %ymm8, %ymm5
vpaddw %ymm6, %ymm10, %ymm6
vpaddw %ymm4, %ymm2, %ymm4

Loading…
Cancel
Save