|
|
|
/*
|
|
|
|
* Copyright (c) 2005-2012 Michael Niedermayer <michaelni@gmx.at>
|
|
|
|
*
|
|
|
|
* This file is part of FFmpeg.
|
|
|
|
*
|
|
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
* miscellaneous math routines and tables
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <limits.h>
|
|
|
|
|
|
|
|
#include "mathematics.h"
|
avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm
This uses Stein's binary GCD algorithm:
https://en.wikipedia.org/wiki/Binary_GCD_algorithm
to get a roughly 4x speedup over Euclidean GCD on standard architectures
with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise.
At the moment, the compiler intrinsic is used on GCC and Clang due to
its easy availability.
Quick note regarding overflow: yes, subtractions on int64_t can, but the
llabs takes care of that. The llabs is also guaranteed to be safe, with
no annoying INT64_MIN business since INT64_MIN being a power of 2, is
shifted down before being sent to llabs.
The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On
GCC, this is provided by a built-in. On Microsoft, there is a
BitScanForward64 analog of BitScanForward that should work; but I can't confirm.
Apparently it is not available on 32 bit builds; so this may or may not
work correctly. On Intel, per the documentation there is only an
intrinsic for _bit_scan_forward and people have posted on forums
regarding _bit_scan_forward64, but often their documentation is
woeful. Again, I don't have it, so I can't test.
As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest
use a compiled version based on the De-Bruijn method of Leiserson et al:
http://supertech.csail.mit.edu/papers/debruijn.pdf.
Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell)
with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a
make fate.
aac-am00_88.err:
builtin:
714 decicycles in av_gcd, 4095 runs, 1 skips
de-bruijn:
1440 decicycles in av_gcd, 4096 runs, 0 skips
previous:
2889 decicycles in av_gcd, 4096 runs, 0 skips
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago
|
|
|
#include "libavutil/intmath.h"
|
|
|
|
#include "libavutil/common.h"
|
|
|
|
#include "avassert.h"
|
|
|
|
#include "version.h"
|
|
|
|
|
avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm
This uses Stein's binary GCD algorithm:
https://en.wikipedia.org/wiki/Binary_GCD_algorithm
to get a roughly 4x speedup over Euclidean GCD on standard architectures
with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise.
At the moment, the compiler intrinsic is used on GCC and Clang due to
its easy availability.
Quick note regarding overflow: yes, subtractions on int64_t can, but the
llabs takes care of that. The llabs is also guaranteed to be safe, with
no annoying INT64_MIN business since INT64_MIN being a power of 2, is
shifted down before being sent to llabs.
The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On
GCC, this is provided by a built-in. On Microsoft, there is a
BitScanForward64 analog of BitScanForward that should work; but I can't confirm.
Apparently it is not available on 32 bit builds; so this may or may not
work correctly. On Intel, per the documentation there is only an
intrinsic for _bit_scan_forward and people have posted on forums
regarding _bit_scan_forward64, but often their documentation is
woeful. Again, I don't have it, so I can't test.
As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest
use a compiled version based on the De-Bruijn method of Leiserson et al:
http://supertech.csail.mit.edu/papers/debruijn.pdf.
Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell)
with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a
make fate.
aac-am00_88.err:
builtin:
714 decicycles in av_gcd, 4095 runs, 1 skips
de-bruijn:
1440 decicycles in av_gcd, 4096 runs, 0 skips
previous:
2889 decicycles in av_gcd, 4096 runs, 0 skips
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago
|
|
|
/* Stein's binary GCD algorithm:
|
|
|
|
* https://en.wikipedia.org/wiki/Binary_GCD_algorithm */
|
|
|
|
int64_t av_gcd(int64_t a, int64_t b) {
|
|
|
|
int za, zb, k;
|
|
|
|
int64_t u, v;
|
|
|
|
if (a == 0)
|
|
|
|
return b;
|
|
|
|
if (b == 0)
|
|
|
|
return a;
|
avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm
This uses Stein's binary GCD algorithm:
https://en.wikipedia.org/wiki/Binary_GCD_algorithm
to get a roughly 4x speedup over Euclidean GCD on standard architectures
with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise.
At the moment, the compiler intrinsic is used on GCC and Clang due to
its easy availability.
Quick note regarding overflow: yes, subtractions on int64_t can, but the
llabs takes care of that. The llabs is also guaranteed to be safe, with
no annoying INT64_MIN business since INT64_MIN being a power of 2, is
shifted down before being sent to llabs.
The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On
GCC, this is provided by a built-in. On Microsoft, there is a
BitScanForward64 analog of BitScanForward that should work; but I can't confirm.
Apparently it is not available on 32 bit builds; so this may or may not
work correctly. On Intel, per the documentation there is only an
intrinsic for _bit_scan_forward and people have posted on forums
regarding _bit_scan_forward64, but often their documentation is
woeful. Again, I don't have it, so I can't test.
As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest
use a compiled version based on the De-Bruijn method of Leiserson et al:
http://supertech.csail.mit.edu/papers/debruijn.pdf.
Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell)
with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a
make fate.
aac-am00_88.err:
builtin:
714 decicycles in av_gcd, 4095 runs, 1 skips
de-bruijn:
1440 decicycles in av_gcd, 4096 runs, 0 skips
previous:
2889 decicycles in av_gcd, 4096 runs, 0 skips
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago
|
|
|
za = ff_ctzll(a);
|
|
|
|
zb = ff_ctzll(b);
|
|
|
|
k = FFMIN(za, zb);
|
|
|
|
u = llabs(a >> za);
|
|
|
|
v = llabs(b >> zb);
|
|
|
|
while (u != v) {
|
|
|
|
if (u > v)
|
|
|
|
FFSWAP(int64_t, v, u);
|
|
|
|
v -= u;
|
|
|
|
v >>= ff_ctzll(v);
|
|
|
|
}
|
avutil/mathematics: make av_gcd more robust
This ensures that no undefined behavior is invoked, while retaining
identical return values in all cases and at no loss of performance
(identical asm on clang and gcc).
Essentially, this patch exchanges undefined behavior with implementation
defined behavior, a strict improvement.
Rationale:
1. The ideal solution is to have the return type a uint64_t. This
unfortunately requires an API change.
2. The only pathological behavior happens if both arguments are
INT64_MIN, to the best of my knowledge. In such a case, the
implementation defined behavior is invoked in the sense that UINT64_MAX
is interpreted as INT64_MIN, which any reasonable implementation will
do. In any case, any usage where both arguments are INT64_MIN is a
fuzzer anyway.
3. Alternatives of checking, etc require branching and lose performance
for no concrete gain - no client cares about av_gcd's actual value when
both args are INT64_MIN. Even if it did, on sane platforms (e.g all the
ones FFmpeg cares about), it produces a correct gcd, namely INT64_MIN.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
9 years ago
|
|
|
return (uint64_t)u << k;
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
|
|
|
|
{
|
|
|
|
int64_t r = 0;
|
|
|
|
av_assert2(c > 0);
|
|
|
|
av_assert2(b >=0);
|
|
|
|
av_assert2((unsigned)(rnd&~AV_ROUND_PASS_MINMAX)<=5 && (rnd&~AV_ROUND_PASS_MINMAX)!=4);
|
|
|
|
|
|
|
|
if (c <= 0 || b < 0 || !((unsigned)(rnd&~AV_ROUND_PASS_MINMAX)<=5 && (rnd&~AV_ROUND_PASS_MINMAX)!=4))
|
|
|
|
return INT64_MIN;
|
|
|
|
|
|
|
|
if (rnd & AV_ROUND_PASS_MINMAX) {
|
|
|
|
if (a == INT64_MIN || a == INT64_MAX)
|
|
|
|
return a;
|
|
|
|
rnd -= AV_ROUND_PASS_MINMAX;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (a < 0)
|
|
|
|
return -(uint64_t)av_rescale_rnd(-FFMAX(a, -INT64_MAX), b, c, rnd ^ ((rnd >> 1) & 1));
|
|
|
|
|
|
|
|
if (rnd == AV_ROUND_NEAR_INF)
|
|
|
|
r = c / 2;
|
|
|
|
else if (rnd & 1)
|
|
|
|
r = c - 1;
|
|
|
|
|
|
|
|
if (b <= INT_MAX && c <= INT_MAX) {
|
|
|
|
if (a <= INT_MAX)
|
|
|
|
return (a * b + r) / c;
|
|
|
|
else {
|
|
|
|
int64_t ad = a / c;
|
|
|
|
int64_t a2 = (a % c * b + r) / c;
|
|
|
|
if (ad >= INT32_MAX && b && ad > (INT64_MAX - a2) / b)
|
|
|
|
return INT64_MIN;
|
|
|
|
return ad * b + a2;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
#if 1
|
|
|
|
uint64_t a0 = a & 0xFFFFFFFF;
|
|
|
|
uint64_t a1 = a >> 32;
|
|
|
|
uint64_t b0 = b & 0xFFFFFFFF;
|
|
|
|
uint64_t b1 = b >> 32;
|
|
|
|
uint64_t t1 = a0 * b1 + a1 * b0;
|
|
|
|
uint64_t t1a = t1 << 32;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
a0 = a0 * b0 + t1a;
|
|
|
|
a1 = a1 * b1 + (t1 >> 32) + (a0 < t1a);
|
|
|
|
a0 += r;
|
|
|
|
a1 += a0 < r;
|
|
|
|
|
|
|
|
for (i = 63; i >= 0; i--) {
|
|
|
|
a1 += a1 + ((a0 >> i) & 1);
|
|
|
|
t1 += t1;
|
|
|
|
if (c <= a1) {
|
|
|
|
a1 -= c;
|
|
|
|
t1++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (t1 > INT64_MAX)
|
|
|
|
return INT64_MIN;
|
|
|
|
return t1;
|
|
|
|
#else
|
|
|
|
/* reference code doing (a*b + r) / c, requires libavutil/integer.h */
|
|
|
|
AVInteger ai;
|
|
|
|
ai = av_mul_i(av_int2i(a), av_int2i(b));
|
|
|
|
ai = av_add_i(ai, av_int2i(r));
|
|
|
|
|
|
|
|
return av_i2int(av_div_i(ai, av_int2i(c)));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
|
|
|
|
{
|
|
|
|
return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale_q_rnd(int64_t a, AVRational bq, AVRational cq,
|
|
|
|
enum AVRounding rnd)
|
|
|
|
{
|
|
|
|
int64_t b = bq.num * (int64_t)cq.den;
|
|
|
|
int64_t c = cq.num * (int64_t)bq.den;
|
|
|
|
return av_rescale_rnd(a, b, c, rnd);
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
|
|
|
|
{
|
|
|
|
return av_rescale_q_rnd(a, bq, cq, AV_ROUND_NEAR_INF);
|
|
|
|
}
|
|
|
|
|
|
|
|
int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b)
|
|
|
|
{
|
|
|
|
int64_t a = tb_a.num * (int64_t)tb_b.den;
|
|
|
|
int64_t b = tb_b.num * (int64_t)tb_a.den;
|
|
|
|
if ((FFABS(ts_a)|a|FFABS(ts_b)|b) <= INT_MAX)
|
|
|
|
return (ts_a*a > ts_b*b) - (ts_a*a < ts_b*b);
|
|
|
|
if (av_rescale_rnd(ts_a, a, b, AV_ROUND_DOWN) < ts_b)
|
|
|
|
return -1;
|
|
|
|
if (av_rescale_rnd(ts_b, b, a, AV_ROUND_DOWN) < ts_a)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_compare_mod(uint64_t a, uint64_t b, uint64_t mod)
|
|
|
|
{
|
|
|
|
int64_t c = (a - b) & (mod - 1);
|
|
|
|
if (c > (mod >> 1))
|
|
|
|
c -= mod;
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_rescale_delta(AVRational in_tb, int64_t in_ts, AVRational fs_tb, int duration, int64_t *last, AVRational out_tb){
|
|
|
|
int64_t a, b, this;
|
|
|
|
|
|
|
|
av_assert0(in_ts != AV_NOPTS_VALUE);
|
|
|
|
av_assert0(duration >= 0);
|
|
|
|
|
|
|
|
if (*last == AV_NOPTS_VALUE || !duration || in_tb.num*(int64_t)out_tb.den <= out_tb.num*(int64_t)in_tb.den) {
|
|
|
|
simple_round:
|
|
|
|
*last = av_rescale_q(in_ts, in_tb, fs_tb) + duration;
|
|
|
|
return av_rescale_q(in_ts, in_tb, out_tb);
|
|
|
|
}
|
|
|
|
|
|
|
|
a = av_rescale_q_rnd(2*in_ts-1, in_tb, fs_tb, AV_ROUND_DOWN) >>1;
|
|
|
|
b = (av_rescale_q_rnd(2*in_ts+1, in_tb, fs_tb, AV_ROUND_UP )+1)>>1;
|
|
|
|
if (*last < 2*a - b || *last > 2*b - a)
|
|
|
|
goto simple_round;
|
|
|
|
|
|
|
|
this = av_clip64(*last, a, b);
|
|
|
|
*last = this + duration;
|
|
|
|
|
|
|
|
return av_rescale_q(this, fs_tb, out_tb);
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t av_add_stable(AVRational ts_tb, int64_t ts, AVRational inc_tb, int64_t inc)
|
|
|
|
{
|
|
|
|
int64_t m, d;
|
|
|
|
|
|
|
|
if (inc != 1)
|
|
|
|
inc_tb = av_mul_q(inc_tb, (AVRational) {inc, 1});
|
|
|
|
|
|
|
|
m = inc_tb.num * (int64_t)ts_tb.den;
|
|
|
|
d = inc_tb.den * (int64_t)ts_tb.num;
|
|
|
|
|
|
|
|
if (m % d == 0 && ts <= INT64_MAX - m / d)
|
|
|
|
return ts + m / d;
|
|
|
|
if (m < d)
|
|
|
|
return ts;
|
|
|
|
|
|
|
|
{
|
|
|
|
int64_t old = av_rescale_q(ts, ts_tb, inc_tb);
|
|
|
|
int64_t old_ts = av_rescale_q(old, inc_tb, ts_tb);
|
|
|
|
|
|
|
|
if (old == INT64_MAX || old == AV_NOPTS_VALUE || old_ts == AV_NOPTS_VALUE)
|
|
|
|
return ts;
|
|
|
|
|
|
|
|
return av_rescale_q(old + 1, inc_tb, ts_tb) + (ts - old_ts);
|
|
|
|
}
|
|
|
|
}
|