|
|
@ -19,7 +19,8 @@ |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
#include "libavutil/aarch64/asm.S" |
|
|
|
#include "libavutil/aarch64/asm.S" |
|
|
|
#define MAX_PB_SIZE 64 |
|
|
|
#define HEVC_MAX_PB_SIZE 64 |
|
|
|
|
|
|
|
#define VVC_MAX_PB_SIZE 128 |
|
|
|
|
|
|
|
|
|
|
|
const epel_filters, align=4 |
|
|
|
const epel_filters, align=4 |
|
|
|
.byte 0, 0, 0, 0 |
|
|
|
.byte 0, 0, 0, 0 |
|
|
@ -131,8 +132,13 @@ endconst |
|
|
|
b.ne 1b |
|
|
|
b.ne 1b |
|
|
|
.endm |
|
|
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function ff_vvc_put_pel_pixels4_8_neon, export=1 |
|
|
|
|
|
|
|
mov x7, #(VVC_MAX_PB_SIZE * 2) |
|
|
|
|
|
|
|
b 1f |
|
|
|
|
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1 |
|
|
|
mov x7, #(MAX_PB_SIZE * 2) |
|
|
|
mov x7, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.s}[0], [x1], x2 |
|
|
|
1: ld1 {v0.s}[0], [x1], x2 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
subs w3, w3, #1 |
|
|
|
subs w3, w3, #1 |
|
|
@ -142,7 +148,7 @@ function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1 |
|
|
|
mov x7, #(MAX_PB_SIZE * 2 - 8) |
|
|
|
mov x7, #(HEVC_MAX_PB_SIZE * 2 - 8) |
|
|
|
1: ld1 {v0.8b}, [x1], x2 |
|
|
|
1: ld1 {v0.8b}, [x1], x2 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
st1 {v4.d}[0], [x0], #8 |
|
|
|
st1 {v4.d}[0], [x0], #8 |
|
|
@ -152,8 +158,13 @@ function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1 |
|
|
|
ret |
|
|
|
ret |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function ff_vvc_put_pel_pixels8_8_neon, export=1 |
|
|
|
|
|
|
|
mov x7, #(VVC_MAX_PB_SIZE * 2) |
|
|
|
|
|
|
|
b 1f |
|
|
|
|
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1 |
|
|
|
mov x7, #(MAX_PB_SIZE * 2) |
|
|
|
mov x7, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.8b}, [x1], x2 |
|
|
|
1: ld1 {v0.8b}, [x1], x2 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
subs w3, w3, #1 |
|
|
|
subs w3, w3, #1 |
|
|
@ -163,7 +174,7 @@ function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1 |
|
|
|
mov x7, #(MAX_PB_SIZE * 2 - 16) |
|
|
|
mov x7, #(HEVC_MAX_PB_SIZE * 2 - 16) |
|
|
|
1: ld1 {v0.8b, v1.8b}, [x1], x2 |
|
|
|
1: ld1 {v0.8b, v1.8b}, [x1], x2 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
st1 {v4.8h}, [x0], #16 |
|
|
|
st1 {v4.8h}, [x0], #16 |
|
|
@ -174,8 +185,13 @@ function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1 |
|
|
|
ret |
|
|
|
ret |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function ff_vvc_put_pel_pixels16_8_neon, export=1 |
|
|
|
|
|
|
|
mov x7, #(VVC_MAX_PB_SIZE * 2) |
|
|
|
|
|
|
|
b 1f |
|
|
|
|
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1 |
|
|
|
mov x7, #(MAX_PB_SIZE * 2) |
|
|
|
mov x7, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.8b, v1.8b}, [x1], x2 |
|
|
|
1: ld1 {v0.8b, v1.8b}, [x1], x2 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v5.8h, v1.8b, #6 |
|
|
|
ushll v5.8h, v1.8b, #6 |
|
|
@ -186,7 +202,7 @@ function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1 |
|
|
|
mov x7, #(MAX_PB_SIZE * 2) |
|
|
|
mov x7, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.8b-v2.8b}, [x1], x2 |
|
|
|
1: ld1 {v0.8b-v2.8b}, [x1], x2 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v5.8h, v1.8b, #6 |
|
|
|
ushll v5.8h, v1.8b, #6 |
|
|
@ -197,8 +213,13 @@ function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1 |
|
|
|
ret |
|
|
|
ret |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function ff_vvc_put_pel_pixels32_8_neon, export=1 |
|
|
|
|
|
|
|
mov x7, #(VVC_MAX_PB_SIZE * 2) |
|
|
|
|
|
|
|
b 1f |
|
|
|
|
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1 |
|
|
|
mov x7, #(MAX_PB_SIZE * 2) |
|
|
|
mov x7, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.8b-v3.8b}, [x1], x2 |
|
|
|
1: ld1 {v0.8b-v3.8b}, [x1], x2 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v5.8h, v1.8b, #6 |
|
|
|
ushll v5.8h, v1.8b, #6 |
|
|
@ -211,7 +232,7 @@ function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels48_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_pixels48_8_neon, export=1 |
|
|
|
mov x7, #(MAX_PB_SIZE) |
|
|
|
mov x7, #(HEVC_MAX_PB_SIZE) |
|
|
|
1: ld1 {v0.16b-v2.16b}, [x1], x2 |
|
|
|
1: ld1 {v0.16b-v2.16b}, [x1], x2 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll2 v5.8h, v0.16b, #6 |
|
|
|
ushll2 v5.8h, v0.16b, #6 |
|
|
@ -226,26 +247,50 @@ function ff_hevc_put_hevc_pel_pixels48_8_neon, export=1 |
|
|
|
ret |
|
|
|
ret |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1 |
|
|
|
.macro put_pel_pixels64_8_neon
|
|
|
|
1: ld1 {v0.16b-v3.16b}, [x1], x2 |
|
|
|
|
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll v4.8h, v0.8b, #6 |
|
|
|
ushll2 v5.8h, v0.16b, #6 |
|
|
|
ushll2 v5.8h, v0.16b, #6 |
|
|
|
ushll v6.8h, v1.8b, #6 |
|
|
|
ushll v6.8h, v1.8b, #6 |
|
|
|
ushll2 v7.8h, v1.16b, #6 |
|
|
|
ushll2 v7.8h, v1.16b, #6 |
|
|
|
st1 {v4.8h-v7.8h}, [x0], #(MAX_PB_SIZE) |
|
|
|
st1 {v4.8h-v7.8h}, [x0], #64 |
|
|
|
ushll v16.8h, v2.8b, #6 |
|
|
|
ushll v16.8h, v2.8b, #6 |
|
|
|
ushll2 v17.8h, v2.16b, #6 |
|
|
|
ushll2 v17.8h, v2.16b, #6 |
|
|
|
ushll v18.8h, v3.8b, #6 |
|
|
|
ushll v18.8h, v3.8b, #6 |
|
|
|
ushll2 v19.8h, v3.16b, #6 |
|
|
|
ushll2 v19.8h, v3.16b, #6 |
|
|
|
|
|
|
|
st1 {v16.8h-v19.8h}, [x0], x7 |
|
|
|
|
|
|
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function ff_vvc_put_pel_pixels64_8_neon, export=1 |
|
|
|
|
|
|
|
mov x7, #(2 * VVC_MAX_PB_SIZE - 64) |
|
|
|
|
|
|
|
b 1f |
|
|
|
|
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1 |
|
|
|
|
|
|
|
mov x7, #(HEVC_MAX_PB_SIZE) |
|
|
|
|
|
|
|
1: |
|
|
|
|
|
|
|
ld1 {v0.16b-v3.16b}, [x1], x2 |
|
|
|
subs w3, w3, #1 |
|
|
|
subs w3, w3, #1 |
|
|
|
st1 {v16.8h-v19.8h}, [x0], #(MAX_PB_SIZE) |
|
|
|
put_pel_pixels64_8_neon |
|
|
|
b.ne 1b |
|
|
|
b.ne 1b |
|
|
|
ret |
|
|
|
ret |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function ff_vvc_put_pel_pixels128_8_neon, export=1 |
|
|
|
|
|
|
|
mov x7, #64 |
|
|
|
|
|
|
|
1: |
|
|
|
|
|
|
|
mov x6, x1 |
|
|
|
|
|
|
|
ld1 {v0.16b-v3.16b}, [x6], #64 |
|
|
|
|
|
|
|
add x1, x1, x2 |
|
|
|
|
|
|
|
subs w3, w3, #1 |
|
|
|
|
|
|
|
put_pel_pixels64_8_neon |
|
|
|
|
|
|
|
ld1 {v0.16b-v3.16b}, [x6], #64 |
|
|
|
|
|
|
|
put_pel_pixels64_8_neon |
|
|
|
|
|
|
|
b.ne 1b |
|
|
|
|
|
|
|
ret |
|
|
|
|
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.s}[0], [x2], x3 // src |
|
|
|
1: ld1 {v0.s}[0], [x2], x3 // src |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ld1 {v20.4h}, [x4], x10 // src2 |
|
|
|
ld1 {v20.4h}, [x4], x10 // src2 |
|
|
@ -258,7 +303,7 @@ function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
sub x1, x1, #4 |
|
|
|
sub x1, x1, #4 |
|
|
|
1: ld1 {v0.8b}, [x2], x3 |
|
|
|
1: ld1 {v0.8b}, [x2], x3 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
@ -273,7 +318,7 @@ function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.8b}, [x2], x3 // src |
|
|
|
1: ld1 {v0.8b}, [x2], x3 // src |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ld1 {v20.8h}, [x4], x10 // src2 |
|
|
|
ld1 {v20.8h}, [x4], x10 // src2 |
|
|
@ -286,7 +331,7 @@ function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
sub x1, x1, #8 |
|
|
|
sub x1, x1, #8 |
|
|
|
1: ld1 {v0.16b}, [x2], x3 |
|
|
|
1: ld1 {v0.16b}, [x2], x3 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
@ -304,7 +349,7 @@ function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.16b}, [x2], x3 // src |
|
|
|
1: ld1 {v0.16b}, [x2], x3 // src |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll2 v17.8h, v0.16b, #6 |
|
|
|
ushll2 v17.8h, v0.16b, #6 |
|
|
@ -320,7 +365,7 @@ function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.8b-v2.8b}, [x2], x3 // src |
|
|
|
1: ld1 {v0.8b-v2.8b}, [x2], x3 // src |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll v17.8h, v1.8b, #6 |
|
|
|
ushll v17.8h, v1.8b, #6 |
|
|
@ -339,7 +384,7 @@ function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels32_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels32_8_neon, export=1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v0.16b-v1.16b}, [x2], x3 // src |
|
|
|
1: ld1 {v0.16b-v1.16b}, [x2], x3 // src |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll2 v17.8h, v0.16b, #6 |
|
|
|
ushll2 v17.8h, v0.16b, #6 |
|
|
@ -361,7 +406,7 @@ function ff_hevc_put_hevc_pel_bi_pixels32_8_neon, export=1 |
|
|
|
endfunc |
|
|
|
endfunc |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels48_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_pel_bi_pixels48_8_neon, export=1 |
|
|
|
mov x10, #(MAX_PB_SIZE) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE) |
|
|
|
1: ld1 {v0.16b-v2.16b}, [x2], x3 // src |
|
|
|
1: ld1 {v0.16b-v2.16b}, [x2], x3 // src |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll v16.8h, v0.8b, #6 |
|
|
|
ushll2 v17.8h, v0.16b, #6 |
|
|
|
ushll2 v17.8h, v0.16b, #6 |
|
|
@ -369,7 +414,7 @@ function ff_hevc_put_hevc_pel_bi_pixels48_8_neon, export=1 |
|
|
|
ushll2 v19.8h, v1.16b, #6 |
|
|
|
ushll2 v19.8h, v1.16b, #6 |
|
|
|
ushll v20.8h, v2.8b, #6 |
|
|
|
ushll v20.8h, v2.8b, #6 |
|
|
|
ushll2 v21.8h, v2.16b, #6 |
|
|
|
ushll2 v21.8h, v2.16b, #6 |
|
|
|
ld1 {v24.8h-v27.8h}, [x4], #(MAX_PB_SIZE) // src2 |
|
|
|
ld1 {v24.8h-v27.8h}, [x4], #(HEVC_MAX_PB_SIZE) // src2 |
|
|
|
sqadd v16.8h, v16.8h, v24.8h |
|
|
|
sqadd v16.8h, v16.8h, v24.8h |
|
|
|
sqadd v17.8h, v17.8h, v25.8h |
|
|
|
sqadd v17.8h, v17.8h, v25.8h |
|
|
|
sqadd v18.8h, v18.8h, v26.8h |
|
|
|
sqadd v18.8h, v18.8h, v26.8h |
|
|
@ -399,12 +444,12 @@ function ff_hevc_put_hevc_pel_bi_pixels64_8_neon, export=1 |
|
|
|
ushll2 v21.8h, v2.16b, #6 |
|
|
|
ushll2 v21.8h, v2.16b, #6 |
|
|
|
ushll v22.8h, v3.8b, #6 |
|
|
|
ushll v22.8h, v3.8b, #6 |
|
|
|
ushll2 v23.8h, v3.16b, #6 |
|
|
|
ushll2 v23.8h, v3.16b, #6 |
|
|
|
ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(MAX_PB_SIZE) // src2 |
|
|
|
ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(HEVC_MAX_PB_SIZE) // src2 |
|
|
|
sqadd v16.8h, v16.8h, v24.8h |
|
|
|
sqadd v16.8h, v16.8h, v24.8h |
|
|
|
sqadd v17.8h, v17.8h, v25.8h |
|
|
|
sqadd v17.8h, v17.8h, v25.8h |
|
|
|
sqadd v18.8h, v18.8h, v26.8h |
|
|
|
sqadd v18.8h, v18.8h, v26.8h |
|
|
|
sqadd v19.8h, v19.8h, v27.8h |
|
|
|
sqadd v19.8h, v19.8h, v27.8h |
|
|
|
ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(MAX_PB_SIZE) |
|
|
|
ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(HEVC_MAX_PB_SIZE) |
|
|
|
sqadd v20.8h, v20.8h, v24.8h |
|
|
|
sqadd v20.8h, v20.8h, v24.8h |
|
|
|
sqadd v21.8h, v21.8h, v25.8h |
|
|
|
sqadd v21.8h, v21.8h, v25.8h |
|
|
|
sqadd v22.8h, v22.8h, v26.8h |
|
|
|
sqadd v22.8h, v22.8h, v26.8h |
|
|
@ -427,7 +472,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_h4_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_h4_8_neon, export=1 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
sub x2, x2, #1 |
|
|
|
sub x2, x2, #1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v4.8b}, [x2], x3 |
|
|
|
1: ld1 {v4.8b}, [x2], x3 |
|
|
|
ext v5.8b, v4.8b, v4.8b, #1 |
|
|
|
ext v5.8b, v4.8b, v4.8b, #1 |
|
|
|
ext v6.8b, v4.8b, v4.8b, #2 |
|
|
|
ext v6.8b, v4.8b, v4.8b, #2 |
|
|
@ -446,7 +491,7 @@ function ff_hevc_put_hevc_epel_bi_h6_8_neon, export=1 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
sub w1, w1, #4 |
|
|
|
sub w1, w1, #4 |
|
|
|
sub x2, x2, #1 |
|
|
|
sub x2, x2, #1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v24.16b}, [x2], x3 |
|
|
|
1: ld1 {v24.16b}, [x2], x3 |
|
|
|
ext v26.16b, v24.16b, v24.16b, #1 |
|
|
|
ext v26.16b, v24.16b, v24.16b, #1 |
|
|
|
ext v27.16b, v24.16b, v24.16b, #2 |
|
|
|
ext v27.16b, v24.16b, v24.16b, #2 |
|
|
@ -465,7 +510,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_h8_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_h8_8_neon, export=1 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
sub x2, x2, #1 |
|
|
|
sub x2, x2, #1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v24.16b}, [x2], x3 |
|
|
|
1: ld1 {v24.16b}, [x2], x3 |
|
|
|
ext v26.16b, v24.16b, v24.16b, #1 |
|
|
|
ext v26.16b, v24.16b, v24.16b, #1 |
|
|
|
ext v27.16b, v24.16b, v24.16b, #2 |
|
|
|
ext v27.16b, v24.16b, v24.16b, #2 |
|
|
@ -484,7 +529,7 @@ function ff_hevc_put_hevc_epel_bi_h12_8_neon, export=1 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
sub x1, x1, #8 |
|
|
|
sub x1, x1, #8 |
|
|
|
sub x2, x2, #1 |
|
|
|
sub x2, x2, #1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v24.16b}, [x2], x3 |
|
|
|
1: ld1 {v24.16b}, [x2], x3 |
|
|
|
ext v26.16b, v24.16b, v24.16b, #1 |
|
|
|
ext v26.16b, v24.16b, v24.16b, #1 |
|
|
|
ext v27.16b, v24.16b, v24.16b, #2 |
|
|
|
ext v27.16b, v24.16b, v24.16b, #2 |
|
|
@ -506,7 +551,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_h16_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_h16_8_neon, export=1 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
sub x2, x2, #1 |
|
|
|
sub x2, x2, #1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ldr q24, [x2] |
|
|
|
1: ldr q24, [x2] |
|
|
|
ldr s25, [x2, #16] |
|
|
|
ldr s25, [x2, #16] |
|
|
|
add x2, x2, x3 |
|
|
|
add x2, x2, x3 |
|
|
@ -529,7 +574,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_h24_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_h24_8_neon, export=1 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
sub x2, x2, #1 |
|
|
|
sub x2, x2, #1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ld1 {v24.16b, v25.16b}, [x2], x3 |
|
|
|
1: ld1 {v24.16b, v25.16b}, [x2], x3 |
|
|
|
ext v26.16b, v24.16b, v25.16b, #1 |
|
|
|
ext v26.16b, v24.16b, v25.16b, #1 |
|
|
|
ext v27.16b, v24.16b, v25.16b, #2 |
|
|
|
ext v27.16b, v24.16b, v25.16b, #2 |
|
|
@ -556,7 +601,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_h32_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_h32_8_neon, export=1 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
sub x2, x2, #1 |
|
|
|
sub x2, x2, #1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
1: ldp q24, q25, [x2] |
|
|
|
1: ldp q24, q25, [x2] |
|
|
|
ldr s26, [x2, #32] |
|
|
|
ldr s26, [x2, #32] |
|
|
|
add x2, x2, x3 |
|
|
|
add x2, x2, x3 |
|
|
@ -589,7 +634,7 @@ function ff_hevc_put_hevc_epel_bi_h48_8_neon, export=1 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
load_epel_filterb x6, x7 |
|
|
|
sub x2, x2, #1 |
|
|
|
sub x2, x2, #1 |
|
|
|
mov x7, #24 |
|
|
|
mov x7, #24 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2 - 48) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2 - 48) |
|
|
|
1: ld1 {v24.16b, v25.16b, v26.16b}, [x2] |
|
|
|
1: ld1 {v24.16b, v25.16b, v26.16b}, [x2] |
|
|
|
ldr s27, [x2, #48] |
|
|
|
ldr s27, [x2, #48] |
|
|
|
add x2, x2, x3 |
|
|
|
add x2, x2, x3 |
|
|
@ -683,7 +728,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_v4_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_v4_8_neon, export=1 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
sub x2, x2, x3 |
|
|
|
sub x2, x2, x3 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.s}[0], [x2], x3 |
|
|
|
ld1 {v16.s}[0], [x2], x3 |
|
|
|
ld1 {v17.s}[0], [x2], x3 |
|
|
|
ld1 {v17.s}[0], [x2], x3 |
|
|
|
ld1 {v18.s}[0], [x2], x3 |
|
|
|
ld1 {v18.s}[0], [x2], x3 |
|
|
@ -705,7 +750,7 @@ function ff_hevc_put_hevc_epel_bi_v6_8_neon, export=1 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
sub x2, x2, x3 |
|
|
|
sub x2, x2, x3 |
|
|
|
sub x1, x1, #4 |
|
|
|
sub x1, x1, #4 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8b}, [x2], x3 |
|
|
|
ld1 {v16.8b}, [x2], x3 |
|
|
|
ld1 {v17.8b}, [x2], x3 |
|
|
|
ld1 {v17.8b}, [x2], x3 |
|
|
|
ld1 {v18.8b}, [x2], x3 |
|
|
|
ld1 {v18.8b}, [x2], x3 |
|
|
@ -727,7 +772,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_v8_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_v8_8_neon, export=1 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
sub x2, x2, x3 |
|
|
|
sub x2, x2, x3 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8b}, [x2], x3 |
|
|
|
ld1 {v16.8b}, [x2], x3 |
|
|
|
ld1 {v17.8b}, [x2], x3 |
|
|
|
ld1 {v17.8b}, [x2], x3 |
|
|
|
ld1 {v18.8b}, [x2], x3 |
|
|
|
ld1 {v18.8b}, [x2], x3 |
|
|
@ -749,7 +794,7 @@ function ff_hevc_put_hevc_epel_bi_v12_8_neon, export=1 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
sub x1, x1, #8 |
|
|
|
sub x1, x1, #8 |
|
|
|
sub x2, x2, x3 |
|
|
|
sub x2, x2, x3 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.16b}, [x2], x3 |
|
|
|
ld1 {v16.16b}, [x2], x3 |
|
|
|
ld1 {v17.16b}, [x2], x3 |
|
|
|
ld1 {v17.16b}, [x2], x3 |
|
|
|
ld1 {v18.16b}, [x2], x3 |
|
|
|
ld1 {v18.16b}, [x2], x3 |
|
|
@ -774,7 +819,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_v16_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_v16_8_neon, export=1 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
sub x2, x2, x3 |
|
|
|
sub x2, x2, x3 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.16b}, [x2], x3 |
|
|
|
ld1 {v16.16b}, [x2], x3 |
|
|
|
ld1 {v17.16b}, [x2], x3 |
|
|
|
ld1 {v17.16b}, [x2], x3 |
|
|
|
ld1 {v18.16b}, [x2], x3 |
|
|
|
ld1 {v18.16b}, [x2], x3 |
|
|
@ -798,7 +843,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_v24_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_v24_8_neon, export=1 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
sub x2, x2, x3 |
|
|
|
sub x2, x2, x3 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8b, v17.8b, v18.8b}, [x2], x3 |
|
|
|
ld1 {v16.8b, v17.8b, v18.8b}, [x2], x3 |
|
|
|
ld1 {v19.8b, v20.8b, v21.8b}, [x2], x3 |
|
|
|
ld1 {v19.8b, v20.8b, v21.8b}, [x2], x3 |
|
|
|
ld1 {v22.8b, v23.8b, v24.8b}, [x2], x3 |
|
|
|
ld1 {v22.8b, v23.8b, v24.8b}, [x2], x3 |
|
|
@ -825,7 +870,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_bi_v32_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_bi_v32_8_neon, export=1 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
load_epel_filterb x7, x6 |
|
|
|
sub x2, x2, x3 |
|
|
|
sub x2, x2, x3 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.16b, v17.16b}, [x2], x3 |
|
|
|
ld1 {v16.16b, v17.16b}, [x2], x3 |
|
|
|
ld1 {v18.16b, v19.16b}, [x2], x3 |
|
|
|
ld1 {v18.16b, v19.16b}, [x2], x3 |
|
|
|
ld1 {v20.16b, v21.16b}, [x2], x3 |
|
|
|
ld1 {v20.16b, v21.16b}, [x2], x3 |
|
|
@ -895,7 +940,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_v4_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_v4_8_neon, export=1 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
sub x1, x1, x2 |
|
|
|
sub x1, x1, x2 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ldr s16, [x1] |
|
|
|
ldr s16, [x1] |
|
|
|
ldr s17, [x1, x2] |
|
|
|
ldr s17, [x1, x2] |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
@ -915,7 +960,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_v6_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_v6_8_neon, export=1 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
sub x1, x1, x2 |
|
|
|
sub x1, x1, x2 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2 - 8) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2 - 8) |
|
|
|
ldr d16, [x1] |
|
|
|
ldr d16, [x1] |
|
|
|
ldr d17, [x1, x2] |
|
|
|
ldr d17, [x1, x2] |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
@ -936,7 +981,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_v8_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_v8_8_neon, export=1 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
sub x1, x1, x2 |
|
|
|
sub x1, x1, x2 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ldr d16, [x1] |
|
|
|
ldr d16, [x1] |
|
|
|
ldr d17, [x1, x2] |
|
|
|
ldr d17, [x1, x2] |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
@ -956,7 +1001,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_v12_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_v12_8_neon, export=1 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
sub x1, x1, x2 |
|
|
|
sub x1, x1, x2 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ldr q16, [x1] |
|
|
|
ldr q16, [x1] |
|
|
|
ldr q17, [x1, x2] |
|
|
|
ldr q17, [x1, x2] |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
@ -980,7 +1025,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_v16_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_v16_8_neon, export=1 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
sub x1, x1, x2 |
|
|
|
sub x1, x1, x2 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ldr q16, [x1] |
|
|
|
ldr q16, [x1] |
|
|
|
ldr q17, [x1, x2] |
|
|
|
ldr q17, [x1, x2] |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
|
add x1, x1, x2, lsl #1 |
|
|
@ -1002,7 +1047,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_v24_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_v24_8_neon, export=1 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
sub x1, x1, x2 |
|
|
|
sub x1, x1, x2 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8b, v17.8b, v18.8b}, [x1], x2 |
|
|
|
ld1 {v16.8b, v17.8b, v18.8b}, [x1], x2 |
|
|
|
ld1 {v19.8b, v20.8b, v21.8b}, [x1], x2 |
|
|
|
ld1 {v19.8b, v20.8b, v21.8b}, [x1], x2 |
|
|
|
ld1 {v22.8b, v23.8b, v24.8b}, [x1], x2 |
|
|
|
ld1 {v22.8b, v23.8b, v24.8b}, [x1], x2 |
|
|
@ -1025,7 +1070,7 @@ endfunc |
|
|
|
function ff_hevc_put_hevc_epel_v32_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_v32_8_neon, export=1 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
load_epel_filterb x5, x4 |
|
|
|
sub x1, x1, x2 |
|
|
|
sub x1, x1, x2 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.16b, v17.16b}, [x1], x2 |
|
|
|
ld1 {v16.16b, v17.16b}, [x1], x2 |
|
|
|
ld1 {v18.16b, v19.16b}, [x1], x2 |
|
|
|
ld1 {v18.16b, v19.16b}, [x1], x2 |
|
|
|
ld1 {v20.16b, v21.16b}, [x1], x2 |
|
|
|
ld1 {v20.16b, v21.16b}, [x1], x2 |
|
|
@ -1327,7 +1372,7 @@ endfunc |
|
|
|
add x5, x5, x4, lsl #2 |
|
|
|
add x5, x5, x4, lsl #2 |
|
|
|
ld1r {v30.4s}, [x5] |
|
|
|
ld1r {v30.4s}, [x5] |
|
|
|
sub x1, x1, #1 |
|
|
|
sub x1, x1, #1 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
.endm |
|
|
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
function ff_hevc_put_hevc_epel_h4_8_neon, export=1 |
|
|
|
function ff_hevc_put_hevc_epel_h4_8_neon, export=1 |
|
|
@ -2179,7 +2224,7 @@ DISABLE_I8MM |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_hv4_8_end_neon |
|
|
|
function hevc_put_hevc_epel_hv4_8_end_neon |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ldr d16, [sp] |
|
|
|
ldr d16, [sp] |
|
|
|
ldr d17, [sp, x10] |
|
|
|
ldr d17, [sp, x10] |
|
|
|
add sp, sp, x10, lsl #1 |
|
|
|
add sp, sp, x10, lsl #1 |
|
|
@ -2198,7 +2243,7 @@ endfunc |
|
|
|
function hevc_put_hevc_epel_hv6_8_end_neon |
|
|
|
function hevc_put_hevc_epel_hv6_8_end_neon |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
mov x5, #120 |
|
|
|
mov x5, #120 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ldr q16, [sp] |
|
|
|
ldr q16, [sp] |
|
|
|
ldr q17, [sp, x10] |
|
|
|
ldr q17, [sp, x10] |
|
|
|
add sp, sp, x10, lsl #1 |
|
|
|
add sp, sp, x10, lsl #1 |
|
|
@ -2218,7 +2263,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_hv8_8_end_neon |
|
|
|
function hevc_put_hevc_epel_hv8_8_end_neon |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ldr q16, [sp] |
|
|
|
ldr q16, [sp] |
|
|
|
ldr q17, [sp, x10] |
|
|
|
ldr q17, [sp, x10] |
|
|
|
add sp, sp, x10, lsl #1 |
|
|
|
add sp, sp, x10, lsl #1 |
|
|
@ -2238,7 +2283,7 @@ endfunc |
|
|
|
function hevc_put_hevc_epel_hv12_8_end_neon |
|
|
|
function hevc_put_hevc_epel_hv12_8_end_neon |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
mov x5, #112 |
|
|
|
mov x5, #112 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
@ -2258,7 +2303,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_hv16_8_end_neon |
|
|
|
function hevc_put_hevc_epel_hv16_8_end_neon |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
@ -2278,7 +2323,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_hv24_8_end_neon |
|
|
|
function hevc_put_hevc_epel_hv24_8_end_neon |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
load_epel_filterh x5, x4 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10 |
|
|
|
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10 |
|
|
|
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10 |
|
|
@ -2462,7 +2507,7 @@ epel_hv neon |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_uni_hv4_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_hv4_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.4h}, [sp], x10 |
|
|
|
ld1 {v16.4h}, [sp], x10 |
|
|
|
ld1 {v17.4h}, [sp], x10 |
|
|
|
ld1 {v17.4h}, [sp], x10 |
|
|
|
ld1 {v18.4h}, [sp], x10 |
|
|
|
ld1 {v18.4h}, [sp], x10 |
|
|
@ -2481,7 +2526,7 @@ endfunc |
|
|
|
function hevc_put_hevc_epel_uni_hv6_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_hv6_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
sub x1, x1, #4 |
|
|
|
sub x1, x1, #4 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
@ -2501,7 +2546,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_uni_hv8_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_hv8_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
@ -2521,7 +2566,7 @@ endfunc |
|
|
|
function hevc_put_hevc_epel_uni_hv12_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_hv12_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
sub x1, x1, #8 |
|
|
|
sub x1, x1, #8 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
@ -2543,7 +2588,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_uni_hv16_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_hv16_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
@ -2565,7 +2610,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_uni_hv24_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_hv24_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10 |
|
|
|
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10 |
|
|
|
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10 |
|
|
@ -3223,7 +3268,7 @@ DISABLE_I8MM |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_uni_w_hv4_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_w_hv4_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.4h}, [sp], x10 |
|
|
|
ld1 {v16.4h}, [sp], x10 |
|
|
|
ld1 {v17.4h}, [sp], x10 |
|
|
|
ld1 {v17.4h}, [sp], x10 |
|
|
|
ld1 {v18.4h}, [sp], x10 |
|
|
|
ld1 {v18.4h}, [sp], x10 |
|
|
@ -3273,7 +3318,7 @@ endfunc |
|
|
|
function hevc_put_hevc_epel_uni_w_hv6_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_w_hv6_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
sub x1, x1, #4 |
|
|
|
sub x1, x1, #4 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
@ -3326,7 +3371,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_uni_w_hv8_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_w_hv8_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
@ -3376,7 +3421,7 @@ endfunc |
|
|
|
function hevc_put_hevc_epel_uni_w_hv12_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_w_hv12_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
sub x1, x1, #8 |
|
|
|
sub x1, x1, #8 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
@ -3437,7 +3482,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_uni_w_hv16_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_w_hv16_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
@ -3498,7 +3543,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_uni_w_hv24_8_end_neon |
|
|
|
function hevc_put_hevc_epel_uni_w_hv24_8_end_neon |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
load_epel_filterh x6, x5 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10 |
|
|
|
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10 |
|
|
|
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10 |
|
|
@ -3795,7 +3840,7 @@ epel_uni_w_hv neon |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_bi_hv4_8_end_neon |
|
|
|
function hevc_put_hevc_epel_bi_hv4_8_end_neon |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.4h}, [sp], x10 |
|
|
|
ld1 {v16.4h}, [sp], x10 |
|
|
|
ld1 {v17.4h}, [sp], x10 |
|
|
|
ld1 {v17.4h}, [sp], x10 |
|
|
|
ld1 {v18.4h}, [sp], x10 |
|
|
|
ld1 {v18.4h}, [sp], x10 |
|
|
@ -3816,7 +3861,7 @@ endfunc |
|
|
|
function hevc_put_hevc_epel_bi_hv6_8_end_neon |
|
|
|
function hevc_put_hevc_epel_bi_hv6_8_end_neon |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
sub x1, x1, #4 |
|
|
|
sub x1, x1, #4 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
@ -3838,7 +3883,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_bi_hv8_8_end_neon |
|
|
|
function hevc_put_hevc_epel_bi_hv8_8_end_neon |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h}, [sp], x10 |
|
|
@ -3860,7 +3905,7 @@ endfunc |
|
|
|
function hevc_put_hevc_epel_bi_hv12_8_end_neon |
|
|
|
function hevc_put_hevc_epel_bi_hv12_8_end_neon |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
sub x1, x1, #8 |
|
|
|
sub x1, x1, #8 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
@ -3885,7 +3930,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_bi_hv16_8_end_neon |
|
|
|
function hevc_put_hevc_epel_bi_hv16_8_end_neon |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h}, [sp], x10 |
|
|
@ -3910,7 +3955,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_bi_hv24_8_end_neon |
|
|
|
function hevc_put_hevc_epel_bi_hv24_8_end_neon |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10 |
|
|
|
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10 |
|
|
|
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10 |
|
|
|
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10 |
|
|
@ -3939,7 +3984,7 @@ endfunc |
|
|
|
|
|
|
|
|
|
|
|
function hevc_put_hevc_epel_bi_hv32_8_end_neon |
|
|
|
function hevc_put_hevc_epel_bi_hv32_8_end_neon |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
load_epel_filterh x7, x6 |
|
|
|
mov x10, #(MAX_PB_SIZE * 2) |
|
|
|
mov x10, #(HEVC_MAX_PB_SIZE * 2) |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [sp], x10 |
|
|
|
ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [sp], x10 |
|
|
|
ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [sp], x10 |
|
|
|
ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [sp], x10 |
|
|
|