arm/aarch64: vp9lpf: Calculate !hev directly

Previously we first calculated hev, and then negated it.

Since we were able to schedule the negation in the middle
of another calculation, we don't see any gain in all cases.

Before:                     Cortex A7      A8      A9     A53  A53/AArch64
vp9_loop_filter_v_4_8_neon:     147.0   129.0   115.8    89.0         88.7
vp9_loop_filter_v_8_8_neon:     242.0   198.5   174.7   140.0        136.7
vp9_loop_filter_v_16_8_neon:    500.0   419.5   382.7   293.0        275.7
vp9_loop_filter_v_16_16_neon:   971.2   825.5   731.5   579.0        453.0
After:
vp9_loop_filter_v_4_8_neon:     143.0   127.7   114.8    88.0         87.7
vp9_loop_filter_v_8_8_neon:     241.0   197.2   173.7   140.0        136.7
vp9_loop_filter_v_16_8_neon:    497.0   419.5   379.7   293.0        275.7
vp9_loop_filter_v_16_16_neon:   965.2   818.7   731.4   579.0        452.0

Signed-off-by: Martin Storsjö <martin@martin.st>
pull/273/head
Martin Storsjö 8 years ago
parent 3fcf788fbb
commit e1f9de86f4
  1. 5
      libavcodec/aarch64/vp9lpf_neon.S
  2. 5
      libavcodec/arm/vp9lpf_neon.S

@ -292,7 +292,7 @@
.if \mix != 0
sxtl v1.8h, v1.8b
.endif
cmhi v5\sz, v5\sz, v3\sz // hev
cmhs v5\sz, v3\sz, v5\sz // !hev
.if \wd == 8
// If a 4/8 or 8/4 mix is used, clear the relevant half of v6
.if \mix != 0
@ -306,11 +306,10 @@
.elseif \wd == 8
bic v4\sz, v4\sz, v6\sz // fm && !flat8in
.endif
mvn v5\sz, v5\sz // !hev
and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in
.if \wd == 16
and v7\sz, v7\sz, v6\sz // flat8out && flat8in && fm
.endif
and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in
mul_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp5\().8h, \tmp5\().8h, \sz // 3 * (q0 - p0)
bic \tmp1\sz, \tmp1\sz, v5\sz // if (!hev) av_clip_int8 = 0

@ -141,7 +141,7 @@
.if \wd == 8
vcle.u8 d6, d6, d0 @ flat8in
.endif
vcgt.u8 d5, d5, d3 @ hev
vcle.u8 d5, d5, d3 @ !hev
.if \wd == 8
vand d6, d6, d4 @ flat8in && fm
.endif
@ -151,11 +151,10 @@
.elseif \wd == 8
vbic d4, d4, d6 @ fm && !flat8in
.endif
vmvn d5, d5 @ !hev
vand d5, d5, d4 @ !hev && fm && !flat8in
.if \wd == 16
vand d7, d7, d6 @ flat8out && flat8in && fm
.endif
vand d5, d5, d4 @ !hev && fm && !flat8in
vmul.s16 \tmpq2, \tmpq2, \tmpq3 @ 3 * (q0 - p0)
vbic \tmp1, \tmp1, d5 @ if (!hev) av_clip_int8 = 0

Loading…
Cancel
Save