From e280fe13291e9c712a5f4aa13b5263f3e8afed45 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Thu, 28 Jan 2016 01:01:46 +0100 Subject: [PATCH] v210: Use separate sample_factors The 10bit and the 8bit functions can now be implemented to process a different amount of samples. And while at it simplify a little the code. --- libavcodec/v210enc.c | 19 ++++++++++++++----- libavcodec/v210enc.h | 3 ++- libavcodec/x86/v210enc_init.c | 7 ++++--- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c index da0b23f074..51c182c6a8 100644 --- a/libavcodec/v210enc.c +++ b/libavcodec/v210enc.c @@ -86,7 +86,8 @@ av_cold void ff_v210enc_init(V210EncContext *s) { s->pack_line_8 = v210_planar_pack_8_c; s->pack_line_10 = v210_planar_pack_10_c; - s->sample_factor = 1; + s->sample_factor_8 = 1; + s->sample_factor_10 = 1; if (ARCH_X86) ff_v210enc_init_x86(s); @@ -133,15 +134,19 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const uint16_t *y = (const uint16_t *)pic->data[0]; const uint16_t *u = (const uint16_t *)pic->data[1]; const uint16_t *v = (const uint16_t *)pic->data[2]; + + const int sample_size = 6 * s->sample_factor_10; + const int sample_w = avctx->width / sample_size; + for (h = 0; h < avctx->height; h++) { uint32_t val; - w = (avctx->width / (6 * s->sample_factor)) * 6 * s->sample_factor; + w = sample_w * sample_size; s->pack_line_10(y, u, v, dst, w); y += w; u += w >> 1; v += w >> 1; - dst += (w / (6 * s->sample_factor)) * 16 * s->sample_factor; + dst += sample_w * 16 * s->sample_factor_10; for (; w < avctx->width - 5; w += 6) { WRITE_PIXELS(u, y, v); @@ -178,15 +183,19 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const uint8_t *y = pic->data[0]; const uint8_t *u = pic->data[1]; const uint8_t *v = pic->data[2]; + + const int sample_size = 12 * s->sample_factor_8; + const int sample_w = avctx->width / sample_size; + for (h = 0; h < avctx->height; h++) { uint32_t val; - w = (avctx->width / (12 * s->sample_factor)) * 12 * s->sample_factor; + w = sample_w * sample_size; s->pack_line_8(y, u, v, dst, w); y += w; u += w >> 1; v += w >> 1; - dst += (w / (12 * s->sample_factor)) * 32 * s->sample_factor; + dst += sample_w * 32 * s->sample_factor_8; for (; w < avctx->width - 5; w += 6) { WRITE_PIXELS8(u, y, v); diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h index 74b0514f2e..ee3637a584 100644 --- a/libavcodec/v210enc.h +++ b/libavcodec/v210enc.h @@ -28,7 +28,8 @@ typedef struct V210EncContext { const uint8_t *v, uint8_t *dst, ptrdiff_t width); void (*pack_line_10)(const uint16_t *y, const uint16_t *u, const uint16_t *v, uint8_t *dst, ptrdiff_t width); - int sample_factor; + int sample_factor_8; + int sample_factor_10; } V210EncContext; void ff_v210enc_init(V210EncContext *s); diff --git a/libavcodec/x86/v210enc_init.c b/libavcodec/x86/v210enc_init.c index 33f2e4113f..c4d2745b6f 100644 --- a/libavcodec/x86/v210enc_init.c +++ b/libavcodec/x86/v210enc_init.c @@ -46,8 +46,9 @@ av_cold void ff_v210enc_init_x86(V210EncContext *s) s->pack_line_8 = ff_v210_planar_pack_8_avx; if (EXTERNAL_AVX2(cpu_flags)) { - s->sample_factor = 2; - s->pack_line_8 = ff_v210_planar_pack_8_avx2; - s->pack_line_10 = ff_v210_planar_pack_10_avx2; + s->sample_factor_8 = 2; + s->pack_line_8 = ff_v210_planar_pack_8_avx2; + s->sample_factor_10 = 2; + s->pack_line_10 = ff_v210_planar_pack_10_avx2; } }