FFmpeg/libavutil/lls.c

/*
 * linear least squares model
 *
 * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * linear least squares model
 */

#include <math.h>
#include <string.h>

#include "attributes.h"
#include "internal.h"
#include "lls.h"

static void update_lls(LLSModel *m, const double *var)
{
    int i, j;

    for (i = 0; i <= m->indep_count; i++) {
        for (j = i; j <= m->indep_count; j++) {
            m->covariance[i][j] += var[i] * var[j];
        }
    }
}

void avpriv_solve_lls(LLSModel *m, double threshold, unsigned short min_order)
{
    int i, j, k;
    double (*factor)[MAX_VARS_ALIGN] = (void *) &m->covariance[1][0];
    double (*covar) [MAX_VARS_ALIGN] = (void *) &m->covariance[1][1];
    double *covar_y                = m->covariance[0];
    int count                      = m->indep_count;

    for (i = 0; i < count; i++) {
        for (j = i; j < count; j++) {
            double sum = covar[i][j];

            for (k = 0; k <= i-1; k++)
                sum -= factor[i][k] * factor[j][k];

            if (i == j) {
                if (sum < threshold)
                    sum = 1.0;
                factor[i][i] = sqrt(sum);
            } else {
                factor[j][i] = sum / factor[i][i];
            }
        }
    }

    for (i = 0; i < count; i++) {
        double sum = covar_y[i + 1];

        for (k = 0; k <= i-1; k++)
            sum -= factor[i][k] * m->coeff[0][k];

        m->coeff[0][i] = sum / factor[i][i];
    }

    for (j = count - 1; j >= min_order; j--) {
        for (i = j; i >= 0; i--) {
            double sum = m->coeff[0][i];

            for (k = i + 1; k <= j; k++)
                sum -= factor[k][i] * m->coeff[j][k];

            m->coeff[j][i] = sum / factor[i][i];
        }

        m->variance[j] = covar_y[0];

        for (i = 0; i <= j; i++) {
            double sum = m->coeff[j][i] * covar[i][i] - 2 * covar_y[i + 1];

            for (k = 0; k < i; k++)
                sum += 2 * m->coeff[j][k] * covar[k][i];

            m->variance[j] += m->coeff[j][i] * sum;
        }
    }
}

static double evaluate_lls(LLSModel *m, const double *param, int order)
{
    int i;
    double out = 0;

    for (i = 0; i <= order; i++)
        out += param[i] * m->coeff[order][i];

    return out;
}

av_cold void avpriv_init_lls(LLSModel *m, int indep_count)
{
    memset(m, 0, sizeof(LLSModel));
    m->indep_count = indep_count;
    m->update_lls = update_lls;
    m->evaluate_lls = evaluate_lls;
    if (ARCH_X86)
        ff_init_lls_x86(m);
}
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`/*`
			`* linear least squares model`
			`*`
			`* Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>`
			`*`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* version 2.1 of the License, or (at your option) any later version.`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`*`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* FFmpeg is distributed in the hope that it will be useful,`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* License along with FFmpeg; if not, write to the Free Software`
license header consistency cosmetics Originally committed as revision 9484 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`*/`

			`/**`
Remove explicit filename from Doxygen @file commands. Passing an explicit filename to this command is only necessary if the documentation in the @file block refers to a file different from the one the block resides in. Originally committed as revision 22921 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`* @file`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`* linear least squares model`
			`*/`

			`#include <math.h>`
			`#include <string.h>`

avutil: Add av_cold attributes to init functions missing them 12 years ago			`#include "attributes.h"`
libavutil: move FFALIGN macro from common.h to macros.h Include macros.h explicitly in common.h so that external code using FFALIGN does not break. It was already implicitly included through version.h. Include macros.h in lls.h and internal.h for FFALIGN. lls.h was including common.h only for FFALIGN and internal.h was missing the include for FFALIGN. `make checkheaders` did not catch it because it's an internal header. 9 years ago			`#include "internal.h"`
drop LLS1, rename LLS2 to LLS Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 11 years ago			`#include "lls.h"`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago
avutil/lls: Make unchanged function arguments const Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 10 years ago			`static void update_lls(LLSModel m, const double var)`
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`{`
			`int i, j;`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`for (i = 0; i <= m->indep_count; i++) {`
			`for (j = i; j <= m->indep_count; j++) {`
			`m->covariance[i][j] += var[i] * var[j];`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`}`
			`}`
			`}`

drop LLS1, rename LLS2 to LLS Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 11 years ago			`void avpriv_solve_lls(LLSModel *m, double threshold, unsigned short min_order)`
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`{`
			`int i, j, k;`
x86: lpc: simd av_update_lls 4x-6x faster on sandybridge Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 12 years ago			`double (factor)[MAX_VARS_ALIGN] = (void ) &m->covariance[1][0];`
			`double (covar) [MAX_VARS_ALIGN] = (void ) &m->covariance[1][1];`
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`double *covar_y = m->covariance[0];`
			`int count = m->indep_count;`

			`for (i = 0; i < count; i++) {`
			`for (j = i; j < count; j++) {`
			`double sum = covar[i][j];`

avutil/lls: speed up performance of solve_lls This is a trivial rewrite of the loops that results in better prefetching and associated cache efficiency. Essentially, the problem is that modern prefetching logic is based on finite state Markov memory, a reasonable assumption that is used elsewhere in CPU's in for instance branch predictors. Surrounding loops all iterate forward through the array, making the predictor think of prefetching in the forward direction, but the intermediate loop is unnecessarily in the backward direction. Speedup is nontrivial. Benchmarks obtained by 10^6 iterations within solve_lls, with START/STOP_TIMER. File is tests/data/fate/flac-16-lpc-cholesky.err. Hardware: x86-64, Haswell, GNU/Linux. new: 17291 decicycles in solve_lls, 2096706 runs, 446 skips 17255 decicycles in solve_lls, 4193657 runs, 647 skips 17231 decicycles in solve_lls, 8384997 runs, 3611 skips 17189 decicycles in solve_lls,16771010 runs, 6206 skips 17132 decicycles in solve_lls,33544757 runs, 9675 skips 17092 decicycles in solve_lls,67092404 runs, 16460 skips 17058 decicycles in solve_lls,134188213 runs, 29515 skips old: 18009 decicycles in solve_lls, 2096665 runs, 487 skips 17805 decicycles in solve_lls, 4193320 runs, 984 skips 17779 decicycles in solve_lls, 8386855 runs, 1753 skips 18289 decicycles in solve_lls,16774280 runs, 2936 skips 18158 decicycles in solve_lls,33548104 runs, 6328 skips 18420 decicycles in solve_lls,67091793 runs, 17071 skips 18310 decicycles in solve_lls,134187219 runs, 30509 skips Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com> 9 years ago			`for (k = 0; k <= i-1; k++)`
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`sum -= factor[i][k] * factor[j][k];`

			`if (i == j) {`
			`if (sum < threshold)`
			`sum = 1.0;`
			`factor[i][i] = sqrt(sum);`
			`} else {`
			`factor[j][i] = sum / factor[i][i];`
			`}`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`}`
			`}`
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago
			`for (i = 0; i < count; i++) {`
			`double sum = covar_y[i + 1];`

avutil/lls: speed up performance of solve_lls This is a trivial rewrite of the loops that results in better prefetching and associated cache efficiency. Essentially, the problem is that modern prefetching logic is based on finite state Markov memory, a reasonable assumption that is used elsewhere in CPU's in for instance branch predictors. Surrounding loops all iterate forward through the array, making the predictor think of prefetching in the forward direction, but the intermediate loop is unnecessarily in the backward direction. Speedup is nontrivial. Benchmarks obtained by 10^6 iterations within solve_lls, with START/STOP_TIMER. File is tests/data/fate/flac-16-lpc-cholesky.err. Hardware: x86-64, Haswell, GNU/Linux. new: 17291 decicycles in solve_lls, 2096706 runs, 446 skips 17255 decicycles in solve_lls, 4193657 runs, 647 skips 17231 decicycles in solve_lls, 8384997 runs, 3611 skips 17189 decicycles in solve_lls,16771010 runs, 6206 skips 17132 decicycles in solve_lls,33544757 runs, 9675 skips 17092 decicycles in solve_lls,67092404 runs, 16460 skips 17058 decicycles in solve_lls,134188213 runs, 29515 skips old: 18009 decicycles in solve_lls, 2096665 runs, 487 skips 17805 decicycles in solve_lls, 4193320 runs, 984 skips 17779 decicycles in solve_lls, 8386855 runs, 1753 skips 18289 decicycles in solve_lls,16774280 runs, 2936 skips 18158 decicycles in solve_lls,33548104 runs, 6328 skips 18420 decicycles in solve_lls,67091793 runs, 17071 skips 18310 decicycles in solve_lls,134187219 runs, 30509 skips Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com> 9 years ago			`for (k = 0; k <= i-1; k++)`
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`sum -= factor[i][k] * m->coeff[0][k];`

			`m->coeff[0][i] = sum / factor[i][i];`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`}`

lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`for (j = count - 1; j >= min_order; j--) {`
			`for (i = j; i >= 0; i--) {`
			`double sum = m->coeff[0][i];`

			`for (k = i + 1; k <= j; k++)`
			`sum -= factor[k][i] * m->coeff[j][k];`

			`m->coeff[j][i] = sum / factor[i][i];`
calculate all coefficients for several orders during cholesky factorization, the resulting coefficients are not strictly optimal though as there is a small difference in the autocorrelation matrixes which is ignored for the smaller orders Originally committed as revision 5758 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`}`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`m->variance[j] = covar_y[0];`

			`for (i = 0; i <= j; i++) {`
			`double sum = m->coeff[j][i] * covar[i][i] - 2 * covar_y[i + 1];`

			`for (k = 0; k < i; k++)`
			`sum += 2 * m->coeff[j][k] * covar[k][i];`

			`m->variance[j] += m->coeff[j][i] * sum;`
calculate all coefficients for several orders during cholesky factorization, the resulting coefficients are not strictly optimal though as there is a small difference in the autocorrelation matrixes which is ignored for the smaller orders Originally committed as revision 5758 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`}`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`}`
			`}`

avutil/lls: Make unchanged function arguments const Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 10 years ago			`static double evaluate_lls(LLSModel m, const double param, int order)`
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`{`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago			`int i;`
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`double out = 0;`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago
lls: whitespace cosmetics Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`for (i = 0; i <= order; i++)`
			`out += param[i] * m->coeff[order][i];`
linear least squares solver using cholesky factorization Originally committed as revision 5740 to svn://svn.ffmpeg.org/ffmpeg/trunk 19 years ago
			`return out;`
			`}`

drop LLS1, rename LLS2 to LLS Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 11 years ago			`av_cold void avpriv_init_lls(LLSModel *m, int indep_count)`
lpc: use function pointers, in preparation for asm Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 12 years ago			`{`
drop LLS1, rename LLS2 to LLS Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 11 years ago			`memset(m, 0, sizeof(LLSModel));`
lpc: use function pointers, in preparation for asm Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 12 years ago			`m->indep_count = indep_count;`
			`m->update_lls = update_lls;`
			`m->evaluate_lls = evaluate_lls;`
x86: lpc: simd av_update_lls 4x-6x faster on sandybridge Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 12 years ago			`if (ARCH_X86)`
			`ff_init_lls_x86(m);`
lpc: use function pointers, in preparation for asm Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 12 years ago			`}`