You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

215 lines
6.0 KiB

/*
* Copyright (c) 2005-2012 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* miscellaneous math routines and tables
*/
#include <stdint.h>
#include <limits.h>
#include "mathematics.h"
avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm This uses Stein's binary GCD algorithm: https://en.wikipedia.org/wiki/Binary_GCD_algorithm to get a roughly 4x speedup over Euclidean GCD on standard architectures with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise. At the moment, the compiler intrinsic is used on GCC and Clang due to its easy availability. Quick note regarding overflow: yes, subtractions on int64_t can, but the llabs takes care of that. The llabs is also guaranteed to be safe, with no annoying INT64_MIN business since INT64_MIN being a power of 2, is shifted down before being sent to llabs. The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On GCC, this is provided by a built-in. On Microsoft, there is a BitScanForward64 analog of BitScanForward that should work; but I can't confirm. Apparently it is not available on 32 bit builds; so this may or may not work correctly. On Intel, per the documentation there is only an intrinsic for _bit_scan_forward and people have posted on forums regarding _bit_scan_forward64, but often their documentation is woeful. Again, I don't have it, so I can't test. As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest use a compiled version based on the De-Bruijn method of Leiserson et al: http://supertech.csail.mit.edu/papers/debruijn.pdf. Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell) with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a make fate. aac-am00_88.err: builtin: 714 decicycles in av_gcd, 4095 runs, 1 skips de-bruijn: 1440 decicycles in av_gcd, 4096 runs, 0 skips previous: 2889 decicycles in av_gcd, 4096 runs, 0 skips Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago
#include "libavutil/intmath.h"
#include "libavutil/common.h"
#include "avassert.h"
avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm This uses Stein's binary GCD algorithm: https://en.wikipedia.org/wiki/Binary_GCD_algorithm to get a roughly 4x speedup over Euclidean GCD on standard architectures with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise. At the moment, the compiler intrinsic is used on GCC and Clang due to its easy availability. Quick note regarding overflow: yes, subtractions on int64_t can, but the llabs takes care of that. The llabs is also guaranteed to be safe, with no annoying INT64_MIN business since INT64_MIN being a power of 2, is shifted down before being sent to llabs. The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On GCC, this is provided by a built-in. On Microsoft, there is a BitScanForward64 analog of BitScanForward that should work; but I can't confirm. Apparently it is not available on 32 bit builds; so this may or may not work correctly. On Intel, per the documentation there is only an intrinsic for _bit_scan_forward and people have posted on forums regarding _bit_scan_forward64, but often their documentation is woeful. Again, I don't have it, so I can't test. As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest use a compiled version based on the De-Bruijn method of Leiserson et al: http://supertech.csail.mit.edu/papers/debruijn.pdf. Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell) with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a make fate. aac-am00_88.err: builtin: 714 decicycles in av_gcd, 4095 runs, 1 skips de-bruijn: 1440 decicycles in av_gcd, 4096 runs, 0 skips previous: 2889 decicycles in av_gcd, 4096 runs, 0 skips Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago
/* Stein's binary GCD algorithm:
* https://en.wikipedia.org/wiki/Binary_GCD_algorithm */
int64_t av_gcd(int64_t a, int64_t b) {
int za, zb, k;
int64_t u, v;
if (a == 0)
return b;
if (b == 0)
return a;
avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm This uses Stein's binary GCD algorithm: https://en.wikipedia.org/wiki/Binary_GCD_algorithm to get a roughly 4x speedup over Euclidean GCD on standard architectures with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise. At the moment, the compiler intrinsic is used on GCC and Clang due to its easy availability. Quick note regarding overflow: yes, subtractions on int64_t can, but the llabs takes care of that. The llabs is also guaranteed to be safe, with no annoying INT64_MIN business since INT64_MIN being a power of 2, is shifted down before being sent to llabs. The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On GCC, this is provided by a built-in. On Microsoft, there is a BitScanForward64 analog of BitScanForward that should work; but I can't confirm. Apparently it is not available on 32 bit builds; so this may or may not work correctly. On Intel, per the documentation there is only an intrinsic for _bit_scan_forward and people have posted on forums regarding _bit_scan_forward64, but often their documentation is woeful. Again, I don't have it, so I can't test. As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest use a compiled version based on the De-Bruijn method of Leiserson et al: http://supertech.csail.mit.edu/papers/debruijn.pdf. Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell) with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a make fate. aac-am00_88.err: builtin: 714 decicycles in av_gcd, 4095 runs, 1 skips de-bruijn: 1440 decicycles in av_gcd, 4096 runs, 0 skips previous: 2889 decicycles in av_gcd, 4096 runs, 0 skips Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago
za = ff_ctzll(a);
zb = ff_ctzll(b);
k = FFMIN(za, zb);
u = llabs(a >> za);
v = llabs(b >> zb);
while (u != v) {
if (u > v)
FFSWAP(int64_t, v, u);
v -= u;
v >>= ff_ctzll(v);
}
return (uint64_t)u << k;
}
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
{
int64_t r = 0;
av_assert2(c > 0);
av_assert2(b >=0);
av_assert2((unsigned)(rnd&~AV_ROUND_PASS_MINMAX)<=5 && (rnd&~AV_ROUND_PASS_MINMAX)!=4);
if (c <= 0 || b < 0 || !((unsigned)(rnd&~AV_ROUND_PASS_MINMAX)<=5 && (rnd&~AV_ROUND_PASS_MINMAX)!=4))
return INT64_MIN;
if (rnd & AV_ROUND_PASS_MINMAX) {
if (a == INT64_MIN || a == INT64_MAX)
return a;
rnd -= AV_ROUND_PASS_MINMAX;
}
if (a < 0)
return -(uint64_t)av_rescale_rnd(-FFMAX(a, -INT64_MAX), b, c, rnd ^ ((rnd >> 1) & 1));
if (rnd == AV_ROUND_NEAR_INF)
r = c / 2;
else if (rnd & 1)
r = c - 1;
if (b <= INT_MAX && c <= INT_MAX) {
if (a <= INT_MAX)
return (a * b + r) / c;
else {
int64_t ad = a / c;
int64_t a2 = (a % c * b + r) / c;
if (ad >= INT32_MAX && b && ad > (INT64_MAX - a2) / b)
return INT64_MIN;
return ad * b + a2;
}
} else {
#if 1
uint64_t a0 = a & 0xFFFFFFFF;
uint64_t a1 = a >> 32;
uint64_t b0 = b & 0xFFFFFFFF;
uint64_t b1 = b >> 32;
uint64_t t1 = a0 * b1 + a1 * b0;
uint64_t t1a = t1 << 32;
int i;
a0 = a0 * b0 + t1a;
a1 = a1 * b1 + (t1 >> 32) + (a0 < t1a);
a0 += r;
a1 += a0 < r;
for (i = 63; i >= 0; i--) {
a1 += a1 + ((a0 >> i) & 1);
t1 += t1;
if (c <= a1) {
a1 -= c;
t1++;
}
}
if (t1 > INT64_MAX)
return INT64_MIN;
return t1;
#else
/* reference code doing (a*b + r) / c, requires libavutil/integer.h */
AVInteger ai;
ai = av_mul_i(av_int2i(a), av_int2i(b));
ai = av_add_i(ai, av_int2i(r));
return av_i2int(av_div_i(ai, av_int2i(c)));
#endif
}
}
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
{
return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
}
int64_t av_rescale_q_rnd(int64_t a, AVRational bq, AVRational cq,
enum AVRounding rnd)
{
int64_t b = bq.num * (int64_t)cq.den;
int64_t c = cq.num * (int64_t)bq.den;
return av_rescale_rnd(a, b, c, rnd);
}
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
{
return av_rescale_q_rnd(a, bq, cq, AV_ROUND_NEAR_INF);
}
int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b)
{
int64_t a = tb_a.num * (int64_t)tb_b.den;
int64_t b = tb_b.num * (int64_t)tb_a.den;
if ((FFABS64U(ts_a)|a|FFABS64U(ts_b)|b) <= INT_MAX)
return (ts_a*a > ts_b*b) - (ts_a*a < ts_b*b);
if (av_rescale_rnd(ts_a, a, b, AV_ROUND_DOWN) < ts_b)
return -1;
if (av_rescale_rnd(ts_b, b, a, AV_ROUND_DOWN) < ts_a)
return 1;
return 0;
}
int64_t av_compare_mod(uint64_t a, uint64_t b, uint64_t mod)
{
int64_t c = (a - b) & (mod - 1);
if (c > (mod >> 1))
c -= mod;
return c;
}
int64_t av_rescale_delta(AVRational in_tb, int64_t in_ts, AVRational fs_tb, int duration, int64_t *last, AVRational out_tb){
int64_t a, b, this;
av_assert0(in_ts != AV_NOPTS_VALUE);
av_assert0(duration >= 0);
if (*last == AV_NOPTS_VALUE || !duration || in_tb.num*(int64_t)out_tb.den <= out_tb.num*(int64_t)in_tb.den) {
simple_round:
*last = av_rescale_q(in_ts, in_tb, fs_tb) + duration;
return av_rescale_q(in_ts, in_tb, out_tb);
}
a = av_rescale_q_rnd(2*in_ts-1, in_tb, fs_tb, AV_ROUND_DOWN) >>1;
b = (av_rescale_q_rnd(2*in_ts+1, in_tb, fs_tb, AV_ROUND_UP )+1)>>1;
if (*last < 2*a - b || *last > 2*b - a)
goto simple_round;
this = av_clip64(*last, a, b);
*last = this + duration;
return av_rescale_q(this, fs_tb, out_tb);
}
int64_t av_add_stable(AVRational ts_tb, int64_t ts, AVRational inc_tb, int64_t inc)
{
int64_t m, d;
if (inc != 1)
inc_tb = av_mul_q(inc_tb, (AVRational) {inc, 1});
m = inc_tb.num * (int64_t)ts_tb.den;
d = inc_tb.den * (int64_t)ts_tb.num;
if (m % d == 0 && ts <= INT64_MAX - m / d)
return ts + m / d;
if (m < d)
return ts;
{
int64_t old = av_rescale_q(ts, ts_tb, inc_tb);
int64_t old_ts = av_rescale_q(old, inc_tb, ts_tb);
if (old == INT64_MAX || old == AV_NOPTS_VALUE || old_ts == AV_NOPTS_VALUE)
return ts;
return av_sat_add64(av_rescale_q(old + 1, inc_tb, ts_tb), ts - old_ts);
}
}