|
|
|
/*
|
|
|
|
* Copyright (c) 2005-2012 Michael Niedermayer <michaelni@gmx.at>
|
|
|
|
*
|
|
|
|
* This file is part of FFmpeg.
|
|
|
|
*
|
|
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
* miscellaneous math routines and tables
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <limits.h>
|
|
|
|
|
|
|
|
#include "avutil.h"
|
|
|
|
#include "mathematics.h"
|
avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm
This uses Stein's binary GCD algorithm:
https://en.wikipedia.org/wiki/Binary_GCD_algorithm
to get a roughly 4x speedup over Euclidean GCD on standard architectures
with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise.
At the moment, the compiler intrinsic is used on GCC and Clang due to
its easy availability.
Quick note regarding overflow: yes, subtractions on int64_t can, but the
llabs takes care of that. The llabs is also guaranteed to be safe, with
no annoying INT64_MIN business since INT64_MIN being a power of 2, is
shifted down before being sent to llabs.
The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On
GCC, this is provided by a built-in. On Microsoft, there is a
BitScanForward64 analog of BitScanForward that should work; but I can't confirm.
Apparently it is not available on 32 bit builds; so this may or may not
work correctly. On Intel, per the documentation there is only an
intrinsic for _bit_scan_forward and people have posted on forums
regarding _bit_scan_forward64, but often their documentation is
woeful. Again, I don't have it, so I can't test.
As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest
use a compiled version based on the De-Bruijn method of Leiserson et al:
http://supertech.csail.mit.edu/papers/debruijn.pdf.
Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell)
with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a
make fate.
aac-am00_88.err:
builtin:
714 decicycles in av_gcd, 4095 runs, 1 skips
de-bruijn:
1440 decicycles in av_gcd, 4096 runs, 0 skips
previous:
2889 decicycles in av_gcd, 4096 runs, 0 skips
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago
|
|
|
#include "libavutil/intmath.h"
|
|
|
|
#include "libavutil/common.h"
|
|
|
|
#include "avassert.h"
|
|
|
|
|
avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm
This uses Stein's binary GCD algorithm:
https://en.wikipedia.org/wiki/Binary_GCD_algorithm
to get a roughly 4x speedup over Euclidean GCD on standard architectures
with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise.
At the moment, the compiler intrinsic is used on GCC and Clang due to
its easy availability.
Quick note regarding overflow: yes, subtractions on int64_t can, but the
llabs takes care of that. The llabs is also guaranteed to be safe, with
no annoying INT64_MIN business since INT64_MIN being a power of 2, is
shifted down before being sent to llabs.
The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On
GCC, this is provided by a built-in. On Microsoft, there is a
BitScanForward64 analog of BitScanForward that should work; but I can't confirm.
Apparently it is not available on 32 bit builds; so this may or may not
work correctly. On Intel, per the documentation there is only an
intrinsic for _bit_scan_forward and people have posted on forums
regarding _bit_scan_forward64, but often their documentation is
woeful. Again, I don't have it, so I can't test.
As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest
use a compiled version based on the De-Bruijn method of Leiserson et al:
http://supertech.csail.mit.edu/papers/debruijn.pdf.
Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell)
with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a
make fate.
aac-am00_88.err:
builtin:
714 decicycles in av_gcd, 4095 runs, 1 skips
de-bruijn:
1440 decicycles in av_gcd, 4096 runs, 0 skips
previous:
2889 decicycles in av_gcd, 4096 runs, 0 skips
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago
|
|
|
/* Stein's binary GCD algorithm:
|
|
|
|
* https://en.wikipedia.org/wiki/Binary_GCD_algorithm */
|
|
|
|
int64_t av_gcd(int64_t a, int64_t b) {
|
|
|
|
int za, zb, k;
|
|
|
|
int64_t u, v;
|
|
|
|
if (a == 0)
|
|
|
|
return b;
|
|
|
|
if (b == 0)
|
|
|
|
return a;
|
avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm
This uses Stein's binary GCD algorithm:
https://en.wikipedia.org/wiki/Binary_GCD_algorithm
to get a roughly 4x speedup over Euclidean GCD on standard architectures
with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise.
At the moment, the compiler intrinsic is used on GCC and Clang due to
its easy availability.
Quick note regarding overflow: yes, subtractions on int64_t can, but the
llabs takes care of that. The llabs is also guaranteed to be safe, with
no annoying INT64_MIN business since INT64_MIN being a power of 2, is
shifted down before being sent to llabs.
The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On
GCC, this is provided by a built-in. On Microsoft, there is a
BitScanForward64 analog of BitScanForward that should work; but I can't confirm.
Apparently it is not available on 32 bit builds; so this may or may not
work correctly. On Intel, per the documentation there is only an
intrinsic for _bit_scan_forward and people have posted on forums
regarding _bit_scan_forward64, but often their documentation is
woeful. Again, I don't have it, so I can't test.
As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest
use a compiled version based on the De-Bruijn method of Leiserson et al:
http://supertech.csail.mit.edu/papers/debruijn.pdf.
Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell)
with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a
make fate.
aac-am00_88.err:
builtin:
714 decicycles in av_gcd, 4095 runs, 1 skips
de-bruijn:
1440 decicycles in av_gcd, 4096 runs, 0 skips
previous:
2889 decicycles in av_gcd, 4096 runs, 0 skips
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago
|
|
|
za = ff_ctzll(a);
|
|
|
|
zb = ff_ctzll(b);
|
|
|
|
k = FFMIN(za, zb);
|
|
|
|
u = llabs(a >> za);
|
|
|
|
v = llabs(b >> zb);
|
|
|
|
while (u != v) {
|
|
|
|
if (u > v)
|
|
|
|
FFSWAP(int64_t, v, u);
|
|
|
|
v -= u;
|
|
|
|
v >>= ff_ctzll(v);
|
|
|
|
}
|
avutil/mathematics: make av_gcd more robust
This ensures that no undefined behavior is invoked, while retaining
identical return values in all cases and at no loss of performance
(identical asm on clang and gcc).
Essentially, this patch exchanges undefined behavior with implementation
defined behavior, a strict improvement.
Rationale:
1. The ideal solution is to have the return type a uint64_t. This
unfortunately requires an API change.
2. The only pathological behavior happens if both arguments are
INT64_MIN, to the best of my knowledge. In such a case, the
implementation defined behavior is invoked in the sense that UINT64_MAX
is interpreted as INT64_MIN, which any reasonable implementation will
do. In any case, any usage where both arguments are INT64_MIN is a
fuzzer anyway.
3. Alternatives of checking, etc require branching and lose performance
for no concrete gain - no client cares about av_gcd's actual value when
both args are INT64_MIN. Even if it did, on sane platforms (e.g all the
ones FFmpeg cares about), it produces a correct gcd, namely INT64_MIN.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
9 years ago
|
|
|
return (uint64_t)u << k;
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
|
|
|
|
{
|
|
|
|
int64_t r = 0;
|
|
|
|
av_assert2(c > 0);
|
|
|
|
av_assert2(b >=0);
|
|
|
|
av_assert2((unsigned)(rnd&~AV_ROUND_PASS_MINMAX)<=5 && (rnd&~AV_ROUND_PASS_MINMAX)!=4);
|
|
|
|
|
|
|
|
if (c <= 0 || b < 0 || !((unsigned)(rnd&~AV_ROUND_PASS_MINMAX)<=5 && (rnd&~AV_ROUND_PASS_MINMAX)!=4))
|
|
|
|
return INT64_MIN;
|
|
|
|
|
|
|
|
if (rnd & AV_ROUND_PASS_MINMAX) {
|
|
|
|
if (a == INT64_MIN || a == INT64_MAX)
|
|
|
|
return a;
|
|
|
|
rnd -= AV_ROUND_PASS_MINMAX;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (a < 0)
|
|
|
|
return -(uint64_t)av_rescale_rnd(-FFMAX(a, -INT64_MAX), b, c, rnd ^ ((rnd >> 1) & 1));
|
|
|
|
|
|
|
|
if (rnd == AV_ROUND_NEAR_INF)
|
|
|
|
r = c / 2;
|
|
|
|
else if (rnd & 1)
|
|
|
|
r = c - 1;
|
|
|
|
|
|
|
|
if (b <= INT_MAX && c <= INT_MAX) {
|
|
|
|
if (a <= INT_MAX)
|
|
|
|
return (a * b + r) / c;
|
|
|
|
else {
|
|
|
|
int64_t ad = a / c;
|
|
|
|
int64_t a2 = (a % c * b + r) / c;
|
|
|
|
if (ad >= INT32_MAX && b && ad > (INT64_MAX - a2) / b)
|
|
|
|
return INT64_MIN;
|
|
|
|
return ad * b + a2;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
#if 1
|
|
|
|
uint64_t a0 = a & 0xFFFFFFFF;
|
|
|
|
uint64_t a1 = a >> 32;
|
|
|
|
uint64_t b0 = b & 0xFFFFFFFF;
|
|
|
|
uint64_t b1 = b >> 32;
|
|
|
|
uint64_t t1 = a0 * b1 + a1 * b0;
|
|
|
|
uint64_t t1a = t1 << 32;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
a0 = a0 * b0 + t1a;
|
|
|
|
a1 = a1 * b1 + (t1 >> 32) + (a0 < t1a);
|
|
|
|
a0 += r;
|
|
|
|
a1 += a0 < r;
|
|
|
|
|
|
|
|
for (i = 63; i >= 0; i--) {
|
|
|
|
a1 += a1 + ((a0 >> i) & 1);
|
|
|
|
t1 += t1;
|
|
|
|
if (c <= a1) {
|
|
|
|
a1 -= c;
|
|
|
|
t1++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (t1 > INT64_MAX)
|
|
|
|
return INT64_MIN;
|
|
|
|
return t1;
|
|
|
|
#else
|
|
|
|
/* reference code doing (a*b + r) / c, requires libavutil/integer.h */
|
|
|
|
AVInteger ai;
|
|
|
|
ai = av_mul_i(av_int2i(a), av_int2i(b));
|
|
|
|
ai = av_add_i(ai, av_int2i(r));
|
|
|
|
|
|
|
|
return av_i2int(av_div_i(ai, av_int2i(c)));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
|
|
|
|
{
|
|
|
|
return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale_q_rnd(int64_t a, AVRational bq, AVRational cq,
|
|
|
|
enum AVRounding rnd)
|
|
|
|
{
|
|
|
|
int64_t b = bq.num * (int64_t)cq.den;
|
|
|
|
int64_t c = cq.num * (int64_t)bq.den;
|
|
|
|
return av_rescale_rnd(a, b, c, rnd);
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
|
|
|
|
{
|
|
|
|
return av_rescale_q_rnd(a, bq, cq, AV_ROUND_NEAR_INF);
|
|
|
|
}
|
|
|
|
|
|
|
|
int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b)
|
|
|
|
{
|
|
|
|
int64_t a = tb_a.num * (int64_t)tb_b.den;
|
|
|
|
int64_t b = tb_b.num * (int64_t)tb_a.den;
|
|
|
|
if ((FFABS64U(ts_a)|a|FFABS64U(ts_b)|b) <= INT_MAX)
|
|
|
|
return (ts_a*a > ts_b*b) - (ts_a*a < ts_b*b);
|
|
|
|
if (av_rescale_rnd(ts_a, a, b, AV_ROUND_DOWN) < ts_b)
|
|
|
|
return -1;
|
|
|
|
if (av_rescale_rnd(ts_b, b, a, AV_ROUND_DOWN) < ts_a)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_compare_mod(uint64_t a, uint64_t b, uint64_t mod)
|
|
|
|
{
|
|
|
|
int64_t c = (a - b) & (mod - 1);
|
|
|
|
if (c > (mod >> 1))
|
|
|
|
c -= mod;
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale_delta(AVRational in_tb, int64_t in_ts, AVRational fs_tb, int duration, int64_t *last, AVRational out_tb){
|
|
|
|
int64_t a, b, this;
|
|
|
|
|
|
|
|
av_assert0(in_ts != AV_NOPTS_VALUE);
|
|
|
|
av_assert0(duration >= 0);
|
|
|
|
|
|
|
|
if (*last == AV_NOPTS_VALUE || !duration || in_tb.num*(int64_t)out_tb.den <= out_tb.num*(int64_t)in_tb.den) {
|
|
|
|
simple_round:
|
|
|
|
*last = av_rescale_q(in_ts, in_tb, fs_tb) + duration;
|
|
|
|
return av_rescale_q(in_ts, in_tb, out_tb);
|
|
|
|
}
|
|
|
|
|
|
|
|
a = av_rescale_q_rnd(2*in_ts-1, in_tb, fs_tb, AV_ROUND_DOWN) >>1;
|
|
|
|
b = (av_rescale_q_rnd(2*in_ts+1, in_tb, fs_tb, AV_ROUND_UP )+1)>>1;
|
|
|
|
if (*last < 2*a - b || *last > 2*b - a)
|
|
|
|
goto simple_round;
|
|
|
|
|
|
|
|
this = av_clip64(*last, a, b);
|
|
|
|
*last = this + duration;
|
|
|
|
|
|
|
|
return av_rescale_q(this, fs_tb, out_tb);
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_add_stable(AVRational ts_tb, int64_t ts, AVRational inc_tb, int64_t inc)
|
|
|
|
{
|
|
|
|
int64_t m, d;
|
|
|
|
|
|
|
|
if (inc != 1)
|
|
|
|
inc_tb = av_mul_q(inc_tb, (AVRational) {inc, 1});
|
|
|
|
|
|
|
|
m = inc_tb.num * (int64_t)ts_tb.den;
|
|
|
|
d = inc_tb.den * (int64_t)ts_tb.num;
|
|
|
|
|
|
|
|
if (m % d == 0 && ts <= INT64_MAX - m / d)
|
|
|
|
return ts + m / d;
|
|
|
|
if (m < d)
|
|
|
|
return ts;
|
|
|
|
|
|
|
|
{
|
|
|
|
int64_t old = av_rescale_q(ts, ts_tb, inc_tb);
|
|
|
|
int64_t old_ts = av_rescale_q(old, inc_tb, ts_tb);
|
|
|
|
|
|
|
|
if (old == INT64_MAX || old == AV_NOPTS_VALUE || old_ts == AV_NOPTS_VALUE)
|
|
|
|
return ts;
|
|
|
|
|
|
|
|
return av_sat_add64(av_rescale_q(old + 1, inc_tb, ts_tb), ts - old_ts);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline double eval_poly(const double *coeff, int size, double x) {
|
|
|
|
double sum = coeff[size-1];
|
|
|
|
int i;
|
|
|
|
for (i = size-2; i >= 0; --i) {
|
|
|
|
sum *= x;
|
|
|
|
sum += coeff[i];
|
|
|
|
}
|
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* 0th order modified bessel function of the first kind.
|
|
|
|
* Algorithm taken from the Boost project, source:
|
|
|
|
* https://searchcode.com/codesearch/view/14918379/
|
|
|
|
* Use, modification and distribution are subject to the
|
|
|
|
* Boost Software License, Version 1.0 (see notice below).
|
|
|
|
* Boost Software License - Version 1.0 - August 17th, 2003
|
|
|
|
Permission is hereby granted, free of charge, to any person or organization
|
|
|
|
obtaining a copy of the software and accompanying documentation covered by
|
|
|
|
this license (the "Software") to use, reproduce, display, distribute,
|
|
|
|
execute, and transmit the Software, and to prepare derivative works of the
|
|
|
|
Software, and to permit third-parties to whom the Software is furnished to
|
|
|
|
do so, all subject to the following:
|
|
|
|
|
|
|
|
The copyright notices in the Software and this entire statement, including
|
|
|
|
the above license grant, this restriction and the following disclaimer,
|
|
|
|
must be included in all copies of the Software, in whole or in part, and
|
|
|
|
all derivative works of the Software, unless such copies or derivative
|
|
|
|
works are solely in the form of machine-executable object code generated by
|
|
|
|
a source language processor.
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
|
|
|
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
|
|
|
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
|
|
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
DEALINGS IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
double av_bessel_i0(double x) {
|
|
|
|
// Modified Bessel function of the first kind of order zero
|
|
|
|
// minimax rational approximations on intervals, see
|
|
|
|
// Blair and Edwards, Chalk River Report AECL-4928, 1974
|
|
|
|
static const double p1[] = {
|
|
|
|
-2.2335582639474375249e+15,
|
|
|
|
-5.5050369673018427753e+14,
|
|
|
|
-3.2940087627407749166e+13,
|
|
|
|
-8.4925101247114157499e+11,
|
|
|
|
-1.1912746104985237192e+10,
|
|
|
|
-1.0313066708737980747e+08,
|
|
|
|
-5.9545626019847898221e+05,
|
|
|
|
-2.4125195876041896775e+03,
|
|
|
|
-7.0935347449210549190e+00,
|
|
|
|
-1.5453977791786851041e-02,
|
|
|
|
-2.5172644670688975051e-05,
|
|
|
|
-3.0517226450451067446e-08,
|
|
|
|
-2.6843448573468483278e-11,
|
|
|
|
-1.5982226675653184646e-14,
|
|
|
|
-5.2487866627945699800e-18,
|
|
|
|
};
|
|
|
|
static const double q1[] = {
|
|
|
|
-2.2335582639474375245e+15,
|
|
|
|
7.8858692566751002988e+12,
|
|
|
|
-1.2207067397808979846e+10,
|
|
|
|
1.0377081058062166144e+07,
|
|
|
|
-4.8527560179962773045e+03,
|
|
|
|
1.0,
|
|
|
|
};
|
|
|
|
static const double p2[] = {
|
|
|
|
-2.2210262233306573296e-04,
|
|
|
|
1.3067392038106924055e-02,
|
|
|
|
-4.4700805721174453923e-01,
|
|
|
|
5.5674518371240761397e+00,
|
|
|
|
-2.3517945679239481621e+01,
|
|
|
|
3.1611322818701131207e+01,
|
|
|
|
-9.6090021968656180000e+00,
|
|
|
|
};
|
|
|
|
static const double q2[] = {
|
|
|
|
-5.5194330231005480228e-04,
|
|
|
|
3.2547697594819615062e-02,
|
|
|
|
-1.1151759188741312645e+00,
|
|
|
|
1.3982595353892851542e+01,
|
|
|
|
-6.0228002066743340583e+01,
|
|
|
|
8.5539563258012929600e+01,
|
|
|
|
-3.1446690275135491500e+01,
|
|
|
|
1.0,
|
|
|
|
};
|
|
|
|
double y, r, factor;
|
|
|
|
if (x == 0)
|
|
|
|
return 1.0;
|
|
|
|
x = fabs(x);
|
|
|
|
if (x <= 15) {
|
|
|
|
y = x * x;
|
|
|
|
return eval_poly(p1, FF_ARRAY_ELEMS(p1), y) / eval_poly(q1, FF_ARRAY_ELEMS(q1), y);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
y = 1 / x - 1.0 / 15;
|
|
|
|
r = eval_poly(p2, FF_ARRAY_ELEMS(p2), y) / eval_poly(q2, FF_ARRAY_ELEMS(q2), y);
|
|
|
|
factor = exp(x) / sqrt(x);
|
|
|
|
return factor * r;
|
|
|
|
}
|
|
|
|
}
|