avcodec/utvideodec: add SIMD for restore_rgb_planes

Signed-off-by: Paul B Mahol <onemda@gmail.com>
pull/264/head^2
Paul B Mahol 8 years ago
parent 3594788b71
commit 4ed7c2bbc3
  1. 2
      libavcodec/Makefile
  2. 2
      libavcodec/utvideo.h
  3. 53
      libavcodec/utvideodec.c
  4. 82
      libavcodec/utvideodsp.c
  5. 39
      libavcodec/utvideodsp.h
  6. 2
      libavcodec/x86/Makefile
  7. 103
      libavcodec/x86/utvideodsp.asm
  8. 43
      libavcodec/x86/utvideodsp_init.c

@ -583,7 +583,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttaencdsp.o ttadata.o
OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o
OBJS-$(CONFIG_TXD_DECODER) += txd.o
OBJS-$(CONFIG_ULTI_DECODER) += ulti.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o utvideodsp.o
OBJS-$(CONFIG_UTVIDEO_ENCODER) += utvideoenc.o utvideo.o
OBJS-$(CONFIG_V210_DECODER) += v210dec.o
OBJS-$(CONFIG_V210_ENCODER) += v210enc.o

@ -30,6 +30,7 @@
#include "libavutil/common.h"
#include "avcodec.h"
#include "bswapdsp.h"
#include "utvideodsp.h"
#include "lossless_videodsp.h"
#include "lossless_videoencdsp.h"
@ -66,6 +67,7 @@ extern const int ff_ut_pred_order[5];
typedef struct UtvideoContext {
const AVClass *class;
AVCodecContext *avctx;
UTVideoDSPContext utdsp;
BswapDSPContext bdsp;
LLVidDSPContext llviddsp;
LLVidEncDSPContext llvidencdsp;

@ -333,50 +333,6 @@ fail:
return AVERROR_INVALIDDATA;
}
static void restore_rgb_planes(AVFrame *frame, int width, int height)
{
uint8_t *src_r = (uint8_t *)frame->data[2];
uint8_t *src_g = (uint8_t *)frame->data[0];
uint8_t *src_b = (uint8_t *)frame->data[1];
uint8_t r, g, b;
int i, j;
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
r = src_r[i];
g = src_g[i];
b = src_b[i];
src_r[i] = r + g - 0x80;
src_b[i] = b + g - 0x80;
}
src_r += frame->linesize[2];
src_g += frame->linesize[0];
src_b += frame->linesize[1];
}
}
static void restore_rgb_planes10(AVFrame *frame, int width, int height)
{
uint16_t *src_r = (uint16_t *)frame->data[2];
uint16_t *src_g = (uint16_t *)frame->data[0];
uint16_t *src_b = (uint16_t *)frame->data[1];
int r, g, b;
int i, j;
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
r = src_r[i];
g = src_g[i];
b = src_b[i];
src_r[i] = (r + g - 0x200) & 0x3FF;
src_b[i] = (b + g - 0x200) & 0x3FF;
}
src_r += frame->linesize[2] / 2;
src_g += frame->linesize[0] / 2;
src_b += frame->linesize[1] / 2;
}
}
#undef A
#undef B
#undef C
@ -696,7 +652,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
}
}
}
restore_rgb_planes(frame.f, avctx->width, avctx->height);
c->utdsp.restore_rgb_planes(frame.f->data[2], frame.f->data[0], frame.f->data[1],
frame.f->linesize[2], frame.f->linesize[0], frame.f->linesize[1],
avctx->width, avctx->height);
break;
case AV_PIX_FMT_GBRAP10:
case AV_PIX_FMT_GBRP10:
@ -709,7 +667,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
if (ret)
return ret;
}
restore_rgb_planes10(frame.f, avctx->width, avctx->height);
c->utdsp.restore_rgb_planes10((uint16_t *)frame.f->data[2], (uint16_t *)frame.f->data[0], (uint16_t *)frame.f->data[1],
frame.f->linesize[2] / 2, frame.f->linesize[0] / 2, frame.f->linesize[1] / 2,
avctx->width, avctx->height);
break;
case AV_PIX_FMT_YUV420P:
for (i = 0; i < 3; i++) {
@ -830,6 +790,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
c->avctx = avctx;
ff_utvideodsp_init(&c->utdsp);
ff_bswapdsp_init(&c->bdsp);
ff_llviddsp_init(&c->llviddsp);

@ -0,0 +1,82 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "utvideodsp.h"
static void restore_rgb_planes_c(uint8_t *src_r,
uint8_t *src_g,
uint8_t *src_b,
ptrdiff_t linesize_r,
ptrdiff_t linesize_g,
ptrdiff_t linesize_b,
int width, int height)
{
uint8_t r, g, b;
int i, j;
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
r = src_r[i];
g = src_g[i];
b = src_b[i];
src_r[i] = r + g - 0x80;
src_b[i] = b + g - 0x80;
}
src_r += linesize_r;
src_g += linesize_g;
src_b += linesize_b;
}
}
static void restore_rgb_planes10_c(uint16_t *src_r,
uint16_t *src_g,
uint16_t *src_b,
ptrdiff_t linesize_r,
ptrdiff_t linesize_g,
ptrdiff_t linesize_b,
int width, int height)
{
int r, g, b;
int i, j;
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
r = src_r[i];
g = src_g[i];
b = src_b[i];
src_r[i] = (r + g - 0x200) & 0x3FF;
src_b[i] = (b + g - 0x200) & 0x3FF;
}
src_r += linesize_r;
src_g += linesize_g;
src_b += linesize_b;
}
}
av_cold void ff_utvideodsp_init(UTVideoDSPContext *c)
{
c->restore_rgb_planes = restore_rgb_planes_c;
c->restore_rgb_planes10 = restore_rgb_planes10_c;
if (ARCH_X86)
ff_utvideodsp_init_x86(c);
}

@ -0,0 +1,39 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_UTVIDEODSP_H
#define AVCODEC_UTVIDEODSP_H
#include <stdint.h>
#include <stddef.h>
#include "libavutil/pixfmt.h"
#include "config.h"
typedef struct UTVideoDSPContext {
void (*restore_rgb_planes)(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
void (*restore_rgb_planes10)(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
} UTVideoDSPContext;
void ff_utvideodsp_init(UTVideoDSPContext *c);
void ff_utvideodsp_init_x86(UTVideoDSPContext *c);
#endif /* AVCODEC_UTVIDEODSP_H */

@ -65,6 +65,7 @@ OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp_init.o
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
@ -171,6 +172,7 @@ X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
X86ASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
X86ASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o
X86ASM-OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp.o
X86ASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
X86ASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
X86ASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o

@ -0,0 +1,103 @@
;******************************************************************************
;* SIMD-optimized UTVideo functions
;* Copyright (c) 2017 Paul B Mahol
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
%if ARCH_X86_64
SECTION_RODATA
pb_128: times 16 db 128
pw_512: times 8 dw 512
pw_1023: times 8 dw 1023
SECTION .text
INIT_XMM sse2
; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
; int width, int height)
cglobal restore_rgb_planes, 8,9,4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
movsxdifnidn wq, wd
add src_rq, wq
add src_gq, wq
add src_bq, wq
neg wq
mova m3, [pb_128]
.nextrow:
mov xq, wq
.loop:
mova m0, [src_rq + xq]
mova m1, [src_gq + xq]
mova m2, [src_bq + xq]
psubb m1, m3
paddb m0, m1
paddb m2, m1
mova [src_rq+xq], m0
mova [src_bq+xq], m2
add xq, mmsize
jl .loop
add src_rq, linesize_rq
add src_gq, linesize_gq
add src_bq, linesize_bq
sub hd, 1
jg .nextrow
REP_RET
cglobal restore_rgb_planes10, 8,9,5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
shl wd, 1
shl linesize_rq, 1
shl linesize_gq, 1
shl linesize_bq, 1
add src_rq, wq
add src_gq, wq
add src_bq, wq
mova m3, [pw_512]
mova m4, [pw_1023]
neg wq
.nextrow:
mov xq, wq
.loop:
mova m0, [src_rq + xq]
mova m1, [src_gq + xq]
mova m2, [src_bq + xq]
psubw m1, m3
paddw m0, m1
paddw m2, m1
pand m0, m4
pand m2, m4
mova [src_rq+xq], m0
mova [src_bq+xq], m2
add xq, mmsize
jl .loop
add src_rq, linesize_rq
add src_gq, linesize_gq
add src_bq, linesize_bq
sub hd, 1
jg .nextrow
REP_RET
%endif

@ -0,0 +1,43 @@
/*
* Copyright (c) 2017 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/utvideodsp.h"
void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
c->restore_rgb_planes = ff_restore_rgb_planes_sse2;
c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2;
}
}
Loading…
Cancel
Save