diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c index 3f4f71076c..b916869321 100644 --- a/libavcodec/flacdsp.c +++ b/libavcodec/flacdsp.c @@ -26,6 +26,7 @@ #define SAMPLE_SIZE 16 #define PLANAR 0 #include "flacdsp_template.c" +#include "flacdsp_lpc_template.c" #undef PLANAR #define PLANAR 1 @@ -36,6 +37,7 @@ #define SAMPLE_SIZE 32 #define PLANAR 0 #include "flacdsp_template.c" +#include "flacdsp_lpc_template.c" #undef PLANAR #define PLANAR 1 @@ -86,10 +88,13 @@ static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32], av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int bps) { - if (bps > 16) + if (bps > 16) { c->lpc = flac_lpc_32_c; - else + c->lpc_encode = flac_lpc_encode_c_32; + } else { c->lpc = flac_lpc_16_c; + c->lpc_encode = flac_lpc_encode_c_16; + } switch (fmt) { case AV_SAMPLE_FMT_S32: diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h index 429daab346..33184b589d 100644 --- a/libavcodec/flacdsp.h +++ b/libavcodec/flacdsp.h @@ -27,6 +27,8 @@ typedef struct FLACDSPContext { int len, int shift); void (*lpc)(int32_t *samples, const int coeffs[32], int order, int qlevel, int len); + void (*lpc_encode)(int32_t *res, const int32_t *smp, int len, int order, + const int32_t *coefs, int shift); } FLACDSPContext; void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int bps); diff --git a/libavcodec/flacdsp_lpc_template.c b/libavcodec/flacdsp_lpc_template.c new file mode 100644 index 0000000000..269e64b3a8 --- /dev/null +++ b/libavcodec/flacdsp_lpc_template.c @@ -0,0 +1,141 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "libavutil/avutil.h" +#include "mathops.h" + +#undef FUNC +#undef sum_type +#undef MUL +#undef CLIP +#undef FSUF + +#define FUNC(n) AV_JOIN(n ## _, SAMPLE_SIZE) + +#if SAMPLE_SIZE == 32 +# define sum_type int64_t +# define MUL(a, b) MUL64(a, b) +# define CLIP(x) av_clipl_int32(x) +#else +# define sum_type int32_t +# define MUL(a, b) ((a) * (b)) +# define CLIP(x) (x) +#endif + +#define LPC1(x) { \ + int c = coefs[(x)-1]; \ + p0 += MUL(c, s); \ + s = smp[i-(x)+1]; \ + p1 += MUL(c, s); \ +} + +static av_always_inline void FUNC(lpc_encode_unrolled)(int32_t *res, + const int32_t *smp, int len, int order, + const int32_t *coefs, int shift, int big) +{ + int i; + for (i = order; i < len; i += 2) { + int s = smp[i-order]; + sum_type p0 = 0, p1 = 0; + if (big) { + switch (order) { + case 32: LPC1(32) + case 31: LPC1(31) + case 30: LPC1(30) + case 29: LPC1(29) + case 28: LPC1(28) + case 27: LPC1(27) + case 26: LPC1(26) + case 25: LPC1(25) + case 24: LPC1(24) + case 23: LPC1(23) + case 22: LPC1(22) + case 21: LPC1(21) + case 20: LPC1(20) + case 19: LPC1(19) + case 18: LPC1(18) + case 17: LPC1(17) + case 16: LPC1(16) + case 15: LPC1(15) + case 14: LPC1(14) + case 13: LPC1(13) + case 12: LPC1(12) + case 11: LPC1(11) + case 10: LPC1(10) + case 9: LPC1( 9) + LPC1( 8) + LPC1( 7) + LPC1( 6) + LPC1( 5) + LPC1( 4) + LPC1( 3) + LPC1( 2) + LPC1( 1) + } + } else { + switch (order) { + case 8: LPC1( 8) + case 7: LPC1( 7) + case 6: LPC1( 6) + case 5: LPC1( 5) + case 4: LPC1( 4) + case 3: LPC1( 3) + case 2: LPC1( 2) + case 1: LPC1( 1) + } + } + res[i ] = smp[i ] - CLIP(p0 >> shift); + res[i+1] = smp[i+1] - CLIP(p1 >> shift); + } +} + +static void FUNC(flac_lpc_encode_c)(int32_t *res, const int32_t *smp, int len, + int order, const int32_t *coefs, int shift) +{ + int i; + for (i = 0; i < order; i++) + res[i] = smp[i]; +#if CONFIG_SMALL + for (i = order; i < len; i += 2) { + int j; + int s = smp[i]; + sum_type p0 = 0, p1 = 0; + for (j = 0; j < order; j++) { + int c = coefs[j]; + p1 += MUL(c, s); + s = smp[i-j-1]; + p0 += MUL(c, s); + } + res[i ] = smp[i ] - CLIP(p0 >> shift); + res[i+1] = smp[i+1] - CLIP(p1 >> shift); + } +#else + switch (order) { + case 1: FUNC(lpc_encode_unrolled)(res, smp, len, 1, coefs, shift, 0); break; + case 2: FUNC(lpc_encode_unrolled)(res, smp, len, 2, coefs, shift, 0); break; + case 3: FUNC(lpc_encode_unrolled)(res, smp, len, 3, coefs, shift, 0); break; + case 4: FUNC(lpc_encode_unrolled)(res, smp, len, 4, coefs, shift, 0); break; + case 5: FUNC(lpc_encode_unrolled)(res, smp, len, 5, coefs, shift, 0); break; + case 6: FUNC(lpc_encode_unrolled)(res, smp, len, 6, coefs, shift, 0); break; + case 7: FUNC(lpc_encode_unrolled)(res, smp, len, 7, coefs, shift, 0); break; + case 8: FUNC(lpc_encode_unrolled)(res, smp, len, 8, coefs, shift, 0); break; + default: FUNC(lpc_encode_unrolled)(res, smp, len, order, coefs, shift, 1); break; + } +#endif +} diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c index 115664a779..71024f272e 100644 --- a/libavcodec/flacenc.c +++ b/libavcodec/flacenc.c @@ -31,6 +31,7 @@ #include "lpc.h" #include "flac.h" #include "flacdata.h" +#include "flacdsp.h" #define FLAC_SUBFRAME_CONSTANT 0 #define FLAC_SUBFRAME_VERBATIM 1 @@ -106,6 +107,7 @@ typedef struct FlacEncodeContext { uint8_t *md5_buffer; unsigned int md5_buffer_size; DSPContext dsp; + FLACDSPContext flac_dsp; } FlacEncodeContext; @@ -385,6 +387,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) s->options.max_prediction_order, FF_LPC_TYPE_LEVINSON); ff_dsputil_init(&s->dsp, avctx); + ff_flacdsp_init(&s->flac_dsp, avctx->sample_fmt, 16); dprint_compression_options(s); @@ -684,110 +687,6 @@ static void encode_residual_fixed(int32_t *res, const int32_t *smp, int n, } -#define LPC1(x) {\ - int c = coefs[(x)-1];\ - p0 += c * s;\ - s = smp[i-(x)+1];\ - p1 += c * s;\ -} - -static av_always_inline void encode_residual_lpc_unrolled(int32_t *res, - const int32_t *smp, int n, int order, - const int32_t *coefs, int shift, int big) -{ - int i; - for (i = order; i < n; i += 2) { - int s = smp[i-order]; - int p0 = 0, p1 = 0; - if (big) { - switch (order) { - case 32: LPC1(32) - case 31: LPC1(31) - case 30: LPC1(30) - case 29: LPC1(29) - case 28: LPC1(28) - case 27: LPC1(27) - case 26: LPC1(26) - case 25: LPC1(25) - case 24: LPC1(24) - case 23: LPC1(23) - case 22: LPC1(22) - case 21: LPC1(21) - case 20: LPC1(20) - case 19: LPC1(19) - case 18: LPC1(18) - case 17: LPC1(17) - case 16: LPC1(16) - case 15: LPC1(15) - case 14: LPC1(14) - case 13: LPC1(13) - case 12: LPC1(12) - case 11: LPC1(11) - case 10: LPC1(10) - case 9: LPC1( 9) - LPC1( 8) - LPC1( 7) - LPC1( 6) - LPC1( 5) - LPC1( 4) - LPC1( 3) - LPC1( 2) - LPC1( 1) - } - } else { - switch (order) { - case 8: LPC1( 8) - case 7: LPC1( 7) - case 6: LPC1( 6) - case 5: LPC1( 5) - case 4: LPC1( 4) - case 3: LPC1( 3) - case 2: LPC1( 2) - case 1: LPC1( 1) - } - } - res[i ] = smp[i ] - (p0 >> shift); - res[i+1] = smp[i+1] - (p1 >> shift); - } -} - - -static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n, - int order, const int32_t *coefs, int shift) -{ - int i; - for (i = 0; i < order; i++) - res[i] = smp[i]; -#if CONFIG_SMALL - for (i = order; i < n; i += 2) { - int j; - int s = smp[i]; - int p0 = 0, p1 = 0; - for (j = 0; j < order; j++) { - int c = coefs[j]; - p1 += c * s; - s = smp[i-j-1]; - p0 += c * s; - } - res[i ] = smp[i ] - (p0 >> shift); - res[i+1] = smp[i+1] - (p1 >> shift); - } -#else - switch (order) { - case 1: encode_residual_lpc_unrolled(res, smp, n, 1, coefs, shift, 0); break; - case 2: encode_residual_lpc_unrolled(res, smp, n, 2, coefs, shift, 0); break; - case 3: encode_residual_lpc_unrolled(res, smp, n, 3, coefs, shift, 0); break; - case 4: encode_residual_lpc_unrolled(res, smp, n, 4, coefs, shift, 0); break; - case 5: encode_residual_lpc_unrolled(res, smp, n, 5, coefs, shift, 0); break; - case 6: encode_residual_lpc_unrolled(res, smp, n, 6, coefs, shift, 0); break; - case 7: encode_residual_lpc_unrolled(res, smp, n, 7, coefs, shift, 0); break; - case 8: encode_residual_lpc_unrolled(res, smp, n, 8, coefs, shift, 0); break; - default: encode_residual_lpc_unrolled(res, smp, n, order, coefs, shift, 1); break; - } -#endif -} - - static int encode_residual_ch(FlacEncodeContext *s, int ch) { int i, n; @@ -869,7 +768,8 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch) order = min_order + (((max_order-min_order+1) * (i+1)) / levels)-1; if (order < 0) order = 0; - encode_residual_lpc(res, smp, n, order+1, coefs[order], shift[order]); + s->flac_dsp.lpc_encode(res, smp, n, order+1, coefs[order], + shift[order]); bits[i] = find_subframe_rice_params(s, sub, order+1); if (bits[i] < bits[opt_index]) { opt_index = i; @@ -883,7 +783,7 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch) opt_order = 0; bits[0] = UINT32_MAX; for (i = min_order-1; i < max_order; i++) { - encode_residual_lpc(res, smp, n, i+1, coefs[i], shift[i]); + s->flac_dsp.lpc_encode(res, smp, n, i+1, coefs[i], shift[i]); bits[i] = find_subframe_rice_params(s, sub, i+1); if (bits[i] < bits[opt_order]) opt_order = i; @@ -901,7 +801,7 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch) for (i = last-step; i <= last+step; i += step) { if (i < min_order-1 || i >= max_order || bits[i] < UINT32_MAX) continue; - encode_residual_lpc(res, smp, n, i+1, coefs[i], shift[i]); + s->flac_dsp.lpc_encode(res, smp, n, i+1, coefs[i], shift[i]); bits[i] = find_subframe_rice_params(s, sub, i+1); if (bits[i] < bits[opt_order]) opt_order = i; @@ -916,7 +816,7 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch) for (i = 0; i < sub->order; i++) sub->coefs[i] = coefs[sub->order-1][i]; - encode_residual_lpc(res, smp, n, sub->order, sub->coefs, sub->shift); + s->flac_dsp.lpc_encode(res, smp, n, sub->order, sub->coefs, sub->shift); find_subframe_rice_params(s, sub, sub->order);