|
|
@ -40,8 +40,8 @@ function ff_nv24_to_yuv420p_chroma_neon, export=1 |
|
|
|
mov w10, w6 // w10 = w |
|
|
|
mov w10, w6 // w10 = w |
|
|
|
|
|
|
|
|
|
|
|
2: |
|
|
|
2: |
|
|
|
ld2 { v0.16b, v1.16b }, [x4], #32 // v0 = U1, v1 = V1 |
|
|
|
ld2 {v0.16b, v1.16b}, [x4], #32 // v0 = U1, v1 = V1 |
|
|
|
ld2 { v2.16b, v3.16b }, [x9], #32 // v2 = U2, v3 = V2 |
|
|
|
ld2 {v2.16b, v3.16b}, [x9], #32 // v2 = U2, v3 = V2 |
|
|
|
|
|
|
|
|
|
|
|
uaddlp v0.8h, v0.16b // pairwise add U1 into v0 |
|
|
|
uaddlp v0.8h, v0.16b // pairwise add U1 into v0 |
|
|
|
uaddlp v1.8h, v1.16b // pairwise add V1 into v1 |
|
|
|
uaddlp v1.8h, v1.16b // pairwise add V1 into v1 |
|
|
@ -51,8 +51,8 @@ function ff_nv24_to_yuv420p_chroma_neon, export=1 |
|
|
|
shrn v0.8b, v0.8h, #2 // divide by 4 |
|
|
|
shrn v0.8b, v0.8h, #2 // divide by 4 |
|
|
|
shrn v1.8b, v1.8h, #2 // divide by 4 |
|
|
|
shrn v1.8b, v1.8h, #2 // divide by 4 |
|
|
|
|
|
|
|
|
|
|
|
st1 { v0.8b }, [x0], #8 // store U into dst1 |
|
|
|
st1 {v0.8b}, [x0], #8 // store U into dst1 |
|
|
|
st1 { v1.8b }, [x2], #8 // store V into dst2 |
|
|
|
st1 {v1.8b}, [x2], #8 // store V into dst2 |
|
|
|
|
|
|
|
|
|
|
|
subs w10, w10, #8 |
|
|
|
subs w10, w10, #8 |
|
|
|
b.gt 2b |
|
|
|
b.gt 2b |
|
|
|