avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC intra prediction functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC intra predition functions in new file hevcpred_msa.c
Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h

Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
pull/150/head
Shivraj Patil 10 years ago committed by Michael Niedermayer
parent 271195f85b
commit d6d98237ed
  1. 3
      libavcodec/hevcpred.c
  2. 1
      libavcodec/hevcpred.h
  3. 6
      libavcodec/mips/Makefile
  4. 48
      libavcodec/mips/hevcpred_init_mips.c
  5. 73
      libavcodec/mips/hevcpred_mips.h
  6. 3084
      libavcodec/mips/hevcpred_msa.c
  7. 46
      libavutil/mips/generic_macros_msa.h

@ -74,4 +74,7 @@ void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
HEVC_PRED(8);
break;
}
if (ARCH_MIPS)
ff_hevc_pred_init_mips(hpc, bit_depth);
}

@ -41,5 +41,6 @@ typedef struct HEVCPredContext {
} HEVCPredContext;
void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth);
#endif /* AVCODEC_HEVCPRED_H */

@ -18,7 +18,8 @@ OBJS-$(CONFIG_AAC_DECODER) += mips/aacdec_mips.o \
mips/aacpsdsp_mips.o
MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER) += mips/aaccoder_mips.o
MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER) += mips/iirfilter_mips.o
OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o
OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o \
mips/hevcpred_init_mips.o
OBJS-$(CONFIG_H264DSP) += mips/h264dsp_init_mips.o
OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o
MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \
@ -27,7 +28,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \
mips/hevc_mc_bi_msa.o \
mips/hevc_mc_biw_msa.o \
mips/hevc_idct_msa.o \
mips/hevc_lpf_sao_msa.o
mips/hevc_lpf_sao_msa.o \
mips/hevcpred_msa.o
MSA-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_msa.o
LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o
LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o

@ -0,0 +1,48 @@
/*
* Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavcodec/hevc.h"
#include "libavcodec/mips/hevcpred_mips.h"
#if HAVE_MSA
static av_cold void hevc_pred_init_msa(HEVCPredContext *c, const int bit_depth)
{
if (8 == bit_depth) {
c->intra_pred[2] = ff_intra_pred_8_16x16_msa;
c->intra_pred[3] = ff_intra_pred_8_32x32_msa;
c->pred_planar[0] = ff_hevc_intra_pred_planar_0_msa;
c->pred_planar[1] = ff_hevc_intra_pred_planar_1_msa;
c->pred_planar[2] = ff_hevc_intra_pred_planar_2_msa;
c->pred_planar[3] = ff_hevc_intra_pred_planar_3_msa;
c->pred_dc = ff_hevc_intra_pred_dc_msa;
c->pred_angular[0] = ff_pred_intra_pred_angular_0_msa;
c->pred_angular[1] = ff_pred_intra_pred_angular_1_msa;
c->pred_angular[2] = ff_pred_intra_pred_angular_2_msa;
c->pred_angular[3] = ff_pred_intra_pred_angular_3_msa;
}
}
#endif // #if HAVE_MSA
void ff_hevc_pred_init_mips(HEVCPredContext *c, const int bit_depth)
{
#if HAVE_MSA
hevc_pred_init_msa(c, bit_depth);
#endif // #if HAVE_MSA
}

@ -0,0 +1,73 @@
/*
* Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H
#define AVCODEC_MIPS_HEVCPRED_MIPS_H
#include "libavcodec/hevcdsp.h"
void ff_hevc_intra_pred_planar_0_msa(uint8_t *dst,
const uint8_t *src_top,
const uint8_t *src_left,
ptrdiff_t stride);
void ff_hevc_intra_pred_planar_1_msa(uint8_t *dst,
const uint8_t *src_top,
const uint8_t *src_left,
ptrdiff_t stride);
void ff_hevc_intra_pred_planar_2_msa(uint8_t *dst,
const uint8_t *src_top,
const uint8_t *src_left,
ptrdiff_t stride);
void ff_hevc_intra_pred_planar_3_msa(uint8_t *dst,
const uint8_t *src_top,
const uint8_t *src_left,
ptrdiff_t stride);
void ff_hevc_intra_pred_dc_msa(uint8_t *dst, const uint8_t *src_top,
const uint8_t *src_left,
ptrdiff_t stride, int log2, int c_idx);
void ff_pred_intra_pred_angular_0_msa(uint8_t *dst,
const uint8_t *src_top,
const uint8_t *src_left,
ptrdiff_t stride, int c_idx, int mode);
void ff_pred_intra_pred_angular_1_msa(uint8_t *dst,
const uint8_t *src_top,
const uint8_t *src_left,
ptrdiff_t stride, int c_idx, int mode);
void ff_pred_intra_pred_angular_2_msa(uint8_t *dst,
const uint8_t *src_top,
const uint8_t *src_left,
ptrdiff_t stride, int c_idx, int mode);
void ff_pred_intra_pred_angular_3_msa(uint8_t *dst,
const uint8_t *src_top,
const uint8_t *src_left,
ptrdiff_t stride, int c_idx, int mode);
void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx);
void ff_intra_pred_8_32x32_msa(HEVCContext *s, int x0, int y0, int c_idx);
#endif // #ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H

File diff suppressed because it is too large Load Diff

@ -770,7 +770,9 @@
SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val); \
SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val); \
}
#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
#define SLDI_B4_0_SB(...) SLDI_B4_0(v16i8, __VA_ARGS__)
#define SLDI_B4_0_SH(...) SLDI_B4_0(v8i16, __VA_ARGS__)
/* Description : Immediate number of columns to slide
Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
@ -1037,6 +1039,21 @@
out_m; \
} )
/* Description : Horizontal addition of unsigned byte vector elements
Arguments : Inputs - in0, in1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Each unsigned odd byte element from 'in0' is added to
even unsigned byte element from 'in0' (pairwise) and the
halfword result is stored in 'out0'
*/
#define HADD_UB2(RTYPE, in0, in1, out0, out1) \
{ \
out0 = (RTYPE) __msa_hadd_u_h((v16u8) in0, (v16u8) in0); \
out1 = (RTYPE) __msa_hadd_u_h((v16u8) in1, (v16u8) in1); \
}
#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
/* Description : Horizontal subtraction of unsigned byte vector elements
Arguments : Inputs - in0, in1
Outputs - out0, out1
@ -1053,6 +1070,20 @@
#define HSUB_UB2_UH(...) HSUB_UB2(v8u16, __VA_ARGS__)
#define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
/* Description : Insert specified word elements from input vectors to 1
destination vector
Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
Outputs - out (output vector)
Return Type - as per RTYPE
*/
#define INSERT_W2(RTYPE, in0, in1, out) \
{ \
out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \
out = (RTYPE) __msa_insert_w((v4i32) out, 1, in1); \
}
#define INSERT_W2_UB(...) INSERT_W2(v16u8, __VA_ARGS__)
#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
#define INSERT_W4(RTYPE, in0, in1, in2, in3, out) \
{ \
out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \
@ -1364,8 +1395,11 @@
out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
}
#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
#define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__)
#define ILVRL_H2(RTYPE, in0, in1, out0, out1) \
{ \
@ -1923,6 +1957,18 @@
ADD2(in4, in5, in6, in7, out2, out3); \
}
/* Description : Subtraction of 2 pairs of vectors
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
Details : Each element from 2 pairs vectors is subtracted and 2 results
are produced
*/
#define SUB2(in0, in1, in2, in3, out0, out1) \
{ \
out0 = in0 - in1; \
out1 = in2 - in3; \
}
/* Description : Sign extend byte elements from input vector and return
halfword results in pair of vectors
Arguments : Inputs - in (1 input byte vector)

Loading…
Cancel
Save