diracdsp: add dequantization SIMD

Currently unused, to be used in the following commits.

Signed-off-by: Rostislav Pehlivanov <rpehlivanov@obe.tv>
pull/224/head
Rostislav Pehlivanov 9 years ago committed by Rostislav Pehlivanov
parent 244d22452c
commit 80721cc1ff
  1. 24
      libavcodec/diracdsp.c
  2. 4
      libavcodec/diracdsp.h
  3. 37
      libavcodec/x86/diracdsp.asm
  4. 6
      libavcodec/x86/diracdsp_init.c

@ -189,6 +189,27 @@ static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride,
} }
} }
#define DEQUANT_SUBBAND(PX) \
static void dequant_subband_ ## PX ## _c(uint8_t *src, uint8_t *dst, ptrdiff_t stride, \
const int qf, const int qs, int tot_v, int tot_h) \
{ \
int i, y; \
for (y = 0; y < tot_v; y++) { \
PX c, sign, *src_r = (PX *)src, *dst_r = (PX *)dst; \
for (i = 0; i < tot_h; i++) { \
c = *src_r++; \
sign = FFSIGN(c)*(!!c); \
c = (FFABS(c)*qf + qs) >> 2; \
*dst_r++ = c*sign; \
} \
src += tot_h << (sizeof(PX) >> 1); \
dst += stride; \
} \
}
DEQUANT_SUBBAND(int16_t)
DEQUANT_SUBBAND(int32_t)
#define PIXFUNC(PFX, WIDTH) \ #define PIXFUNC(PFX, WIDTH) \
c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \ c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \
c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \ c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \
@ -214,6 +235,9 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c)
c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c; c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c;
c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c; c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c;
c->dequant_subband[0] = c->dequant_subband[2] = dequant_subband_int16_t_c;
c->dequant_subband[1] = c->dequant_subband[3] = dequant_subband_int32_t_c;
PIXFUNC(put, 8); PIXFUNC(put, 8);
PIXFUNC(put, 16); PIXFUNC(put, 16);
PIXFUNC(put, 32); PIXFUNC(put, 32);

@ -22,6 +22,7 @@
#define AVCODEC_DIRACDSP_H #define AVCODEC_DIRACDSP_H
#include <stdint.h> #include <stdint.h>
#include <stddef.h>
typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h); typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h);
typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h); typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h);
@ -46,6 +47,9 @@ typedef struct {
void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/); void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
/* 0-1: int16_t and int32_t asm/c, 2-3: int16 and int32_t, C only */
void (*dequant_subband[4])(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
dirac_weight_func weight_dirac_pixels_tab[3]; dirac_weight_func weight_dirac_pixels_tab[3];
dirac_biweight_func biweight_dirac_pixels_tab[3]; dirac_biweight_func biweight_dirac_pixels_tab[3];
} DiracDSPContext; } DiracDSPContext;

@ -263,3 +263,40 @@ ADD_RECT sse2
HPEL_FILTER sse2 HPEL_FILTER sse2
ADD_OBMC 32, sse2 ADD_OBMC 32, sse2
ADD_OBMC 16, sse2 ADD_OBMC 16, sse2
INIT_XMM sse4
; void dequant_subband_32(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
movd m2, qfd
movd m3, qsd
SPLATD m2
SPLATD m3
mov r4, tot_hq
mov r3, dstq
.loop_v:
mov tot_hq, r4
mov dstq, r3
.loop_h:
movu m0, [srcq]
pabsd m1, m0
pmulld m1, m2
paddd m1, m3
psrld m1, 2
psignd m1, m0
movu [dstq], m1
add srcq, mmsize
add dstq, mmsize
sub tot_hd, 4
jg .loop_h
add r3, strideq
dec tot_vd
jg .loop_v
RET

@ -46,6 +46,8 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src,
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
#if HAVE_YASM #if HAVE_YASM
#define HPEL_FILTER(MMSIZE, EXT) \ #define HPEL_FILTER(MMSIZE, EXT) \
@ -184,4 +186,8 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
} }
if (EXTERNAL_SSE4(mm_flags)) {
c->dequant_subband[1] = ff_dequant_subband_32_sse4;
}
} }

Loading…
Cancel
Save