Add G.729D decoder

17 years ago · e610c5f383
parent b7c7fc3359
commit e610c5f383
2 changed files with 137 additions and 1 deletions
--- a/libavcodec/g729data.h
+++ b/libavcodec/g729data.h
@ -351,4 +351,32 @@ static const uint16_t ma_prediction_coeff[4] = { /* (0.13) */
 static const int16_t lsp_init[10]= { /* (0.15) */
   30000, 26000, 21000, 15000, 8000, 0, -8000,-15000,-21000,-26000
 };
+
+/**
+ * additional "phase" post-processing filter impulse response (D.6.2 of G.729)
+ *
+ * Table contains three impulse responses, correspond to
+ * different amounts of spreading.
+ */
+static const int16_t phase_filter[3][40] =
+{
+  { // maximum spreading (for noise-like segments)
+    14690, 11518,  1268, -2762, -5672,  7514,  -36, -2808, -3041,  4823,
+     2952, -8425,  3785,  1455,  2179, -8638, 8051, -2104, -1455,   777,
+     1108, -2386,  2254,  -364,  -675, -2104, 6046, -5682,  1072,  3123,
+    -5059,  5312, -2330, -3729,  6924, -3890,  675, -1776,    29, 10145,
+  },
+  { // medium spreading
+    30274,  3831, -4037,  2972, -1049, -1003,  2477, -3044,  2815, -2232,
+     1753, -1612,  1714, -1776,  1543, -1009,   429,  -170,   472, -1265,
+     2176, -2707,  2523, -1622,   344,   826, -1530,  1724, -1658,  1701,
+    -2064,  2644, -3061,  2897, -1979,   557,   780, -1370,   842,   655,
+  },
+  { // no spreading (for voiced speech)
+    32767, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  }
+};
 #endif /* AVCODEC_G729DATA_H */
--- a/libavcodec/g729dec.c
+++ b/libavcodec/g729dec.c
@ -81,6 +81,10 @@
 */
 #define MR_ENERGY 1018156

+#define DECISION_NOISE        0
+#define DECISION_INTERMEDIATE 1
+#define DECISION_VOICE        2
+
 typedef enum {
    FORMAT_G729_8K = 0,
    FORMAT_G729D_6K4,
@ -124,6 +128,10 @@ typedef struct {
    /// (14.1) gain code from current and previous subframe
    int16_t past_gain_code[2];

+    /// voice decision on previous subframe (0-noise, 1-intermediate, 2-voice), G.729D
+    int16_t voice_decision;
+
+    int16_t onset;              ///< detected onset level (0-2)
    int16_t was_periodic;       ///< whether previous frame was declared as periodic or not (4.4)
    uint16_t rand_value;        ///< random number generator value (4.4.4)
    int ma_predictor_prev;      ///< switched MA predictor of LSP quantizer from last good frame
@ -230,6 +238,85 @@ static void lsf_restore_from_previous(int16_t* lsfq,
    }
 }

+/**
+ * Constructs new excitation signal and applies phase filter to it
+ * @param out[out] constructed speech signal
+ * @param in original excitation signal
+ * @param fc_cur (2.13) original fixed-codebook vector
+ * @param gain_code (14.1) gain code
+ * @param subframe_size length of the subframe
+ */
+void g729d_get_new_exc(
+        int16_t* out,
+        const int16_t* in,
+        const int16_t* fc_cur,
+        int dstate,
+        int gain_code,
+        int subframe_size)
+{
+    int i;
+    int16_t fc_new[SUBFRAME_SIZE];
+
+    ff_celp_convolve_circ(fc_new, fc_cur, phase_filter[dstate], subframe_size);
+
+    for(i=0; i<subframe_size; i++)
+    {
+        out[i]  = in[i];
+        out[i] -= (gain_code * fc_cur[i] + 0x2000) >> 14;
+        out[i] += (gain_code * fc_new[i] + 0x2000) >> 14;
+    }
+}
+
+/**
+ * Makes decision about onset in current subframe
+ * @param past_onset decision result of previous subframe
+ * @param past_gain_code gain code of current and previous subframe
+ *
+ * @return onset decision result for current subframe
+ */
+int g729d_onset_decision(int past_onset, const int16_t* past_gain_code)
+{
+    if((past_gain_code[0] >> 1) > past_gain_code[1])
+        return 2;
+    else
+        return FFMAX(past_onset-1, 0);
+}
+
+/**
+ * Makes decision about voice presence in current subframe
+ * @param onset onset level
+ * @param prev_voice_decision voice decision result from previous subframe
+ * @param past_gain_pitch pitch gain of current and previous subframes
+ *
+ * @return voice decision result for current subframe
+ */
+static int16_t g729d_voice_decision(int onset, int prev_voice_decision, const int16_t* past_gain_pitch)
+{
+    int i, low_gain_pitch_cnt, voice_decision;
+
+    if(past_gain_pitch[0] >= 14745)      // 0.9
+        voice_decision = DECISION_VOICE;
+    else if (past_gain_pitch[0] <= 9830) // 0.6
+        voice_decision = DECISION_NOISE;
+    else
+        voice_decision = DECISION_INTERMEDIATE;
+
+    for(i=0, low_gain_pitch_cnt=0; i<6; i++)
+        if(past_gain_pitch[i] < 9830)
+            low_gain_pitch_cnt++;
+
+    if(low_gain_pitch_cnt > 2 && !onset)
+        voice_decision = DECISION_NOISE;
+
+    if(!onset && voice_decision > prev_voice_decision + 1)
+        voice_decision--;
+
+    if(onset && voice_decision < DECISION_VOICE)
+        voice_decision++;
+
+    return voice_decision;
+}
+
 static av_cold int decoder_init(AVCodecContext * avctx)
 {
    G729Context* ctx = avctx->priv_data;
@ -302,6 +389,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
    if (buf_size == 10) {
        packet_type = FORMAT_G729_8K;
        format = format_g729_8k;
+        //Reset voice decision
+        ctx->onset = 0;
+        ctx->voice_decision = DECISION_VOICE;
        av_log(avctx, AV_LOG_DEBUG, "Packet type: %s\n", "G.729 @ 8kbit/s");
    } else if (buf_size == 8) {
        packet_type = FORMAT_G729D_6K4;
@ -497,11 +587,29 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
            SUBFRAME_SIZE,
            10,
            1,
-            0x800)) {
+            0x800))
            /* Overflow occured, downscale excitation signal... */
            for (j = 0; j < 2 * SUBFRAME_SIZE + PITCH_DELAY_MAX + INTERPOL_LEN; j++)
                ctx->exc_base[j] >>= 2;

+        /* ... and make synthesis again. */
+        if (packet_type == FORMAT_G729D_6K4) {
+            int16_t exc_new[SUBFRAME_SIZE];
+
+            ctx->onset = g729d_onset_decision(ctx->onset, ctx->past_gain_code);
+            ctx->voice_decision = g729d_voice_decision(ctx->onset, ctx->voice_decision, ctx->past_gain_pitch);
+
+            g729d_get_new_exc(exc_new, ctx->exc  + i * SUBFRAME_SIZE, fc, ctx->voice_decision, ctx->past_gain_code[0], SUBFRAME_SIZE);
+
+            ff_celp_lp_synthesis_filter(
+                    synth+10,
+                    &lp[i][1],
+                    exc_new,
+                    SUBFRAME_SIZE,
+                    10,
+                    0,
+                    0x800);
+        } else {
            ff_celp_lp_synthesis_filter(
                    synth+10,
                    &lp[i][1],