/*
* H .26 L / H .264 / AVC / JVT / 14496 - 10 / . . . encoder / decoder
* Copyright ( c ) 2003 Michael Niedermayer < michaelni @ gmx . at >
*
* This file is part of FFmpeg .
*
* FFmpeg is free software ; you can redistribute it and / or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation ; either
* version 2.1 of the License , or ( at your option ) any later version .
*
* FFmpeg is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* Lesser General Public License for more details .
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg ; if not , write to the Free Software
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
*
*/
/**
* @ file cabac . h
* Context Adaptive Binary Arithmetic Coder .
*/
//#undef NDEBUG
# include <assert.h>
# ifdef ARCH_X86
# include "x86_cpu.h"
# endif
# define CABAC_BITS 16
# define CABAC_MASK ((1<<CABAC_BITS)-1)
# define BRANCHLESS_CABAC_DECODER 1
//#define ARCH_X86_DISABLED 1
typedef struct CABACContext {
int low ;
int range ;
int outstanding_count ;
# ifdef STRICT_LIMITS
int symCount ;
# endif
const uint8_t * bytestream_start ;
const uint8_t * bytestream ;
const uint8_t * bytestream_end ;
PutBitContext pb ;
} CABACContext ;
extern uint8_t ff_h264_mlps_state [ 4 * 64 ] ;
extern uint8_t ff_h264_lps_range [ 4 * 2 * 64 ] ; ///< rangeTabLPS
extern uint8_t ff_h264_mps_state [ 2 * 64 ] ; ///< transIdxMPS
extern uint8_t ff_h264_lps_state [ 2 * 64 ] ; ///< transIdxLPS
extern const uint8_t ff_h264_norm_shift [ 512 ] ;
void ff_init_cabac_encoder ( CABACContext * c , uint8_t * buf , int buf_size ) ;
void ff_init_cabac_decoder ( CABACContext * c , const uint8_t * buf , int buf_size ) ;
void ff_init_cabac_states ( CABACContext * c ) ;
static inline void put_cabac_bit ( CABACContext * c , int b ) {
put_bits ( & c - > pb , 1 , b ) ;
for ( ; c - > outstanding_count ; c - > outstanding_count - - ) {
put_bits ( & c - > pb , 1 , 1 - b ) ;
}
}
static inline void renorm_cabac_encoder ( CABACContext * c ) {
while ( c - > range < 0x100 ) {
//FIXME optimize
if ( c - > low < 0x100 ) {
put_cabac_bit ( c , 0 ) ;
} else if ( c - > low < 0x200 ) {
c - > outstanding_count + + ;
c - > low - = 0x100 ;
} else {
put_cabac_bit ( c , 1 ) ;
c - > low - = 0x200 ;
}
c - > range + = c - > range ;
c - > low + = c - > low ;
}
}
static void put_cabac ( CABACContext * c , uint8_t * const state , int bit ) {
int RangeLPS = ff_h264_lps_range [ 2 * ( c - > range & 0xC0 ) + * state ] ;
if ( bit = = ( ( * state ) & 1 ) ) {
c - > range - = RangeLPS ;
* state = ff_h264_mps_state [ * state ] ;
} else {
c - > low + = c - > range - RangeLPS ;
c - > range = RangeLPS ;
* state = ff_h264_lps_state [ * state ] ;
}
renorm_cabac_encoder ( c ) ;
# ifdef STRICT_LIMITS
c - > symCount + + ;
# endif
}
static void put_cabac_static ( CABACContext * c , int RangeLPS , int bit ) {
assert ( c - > range > RangeLPS ) ;
if ( ! bit ) {
c - > range - = RangeLPS ;
} else {
c - > low + = c - > range - RangeLPS ;
c - > range = RangeLPS ;
}
renorm_cabac_encoder ( c ) ;
# ifdef STRICT_LIMITS
c - > symCount + + ;
# endif
}
/**
* @ param bit 0 - > write zero bit , ! = 0 write one bit
*/
static void put_cabac_bypass ( CABACContext * c , int bit ) {
c - > low + = c - > low ;
if ( bit ) {
c - > low + = c - > range ;
}
//FIXME optimize
if ( c - > low < 0x200 ) {
put_cabac_bit ( c , 0 ) ;
} else if ( c - > low < 0x400 ) {
c - > outstanding_count + + ;
c - > low - = 0x200 ;
} else {
put_cabac_bit ( c , 1 ) ;
c - > low - = 0x400 ;
}
# ifdef STRICT_LIMITS
c - > symCount + + ;
# endif
}
/**
*
* @ return the number of bytes written
*/
static int put_cabac_terminate ( CABACContext * c , int bit ) {
c - > range - = 2 ;
if ( ! bit ) {
renorm_cabac_encoder ( c ) ;
} else {
c - > low + = c - > range ;
c - > range = 2 ;
renorm_cabac_encoder ( c ) ;
assert ( c - > low < = 0x1FF ) ;
put_cabac_bit ( c , c - > low > > 9 ) ;
put_bits ( & c - > pb , 2 , ( ( c - > low > > 7 ) & 3 ) | 1 ) ;
flush_put_bits ( & c - > pb ) ; //FIXME FIXME FIXME XXX wrong
}
# ifdef STRICT_LIMITS
c - > symCount + + ;
# endif
return ( put_bits_count ( & c - > pb ) + 7 ) > > 3 ;
}
/**
* put ( truncated ) unary binarization .
*/
static void put_cabac_u ( CABACContext * c , uint8_t * state , int v , int max , int max_index , int truncated ) {
int i ;
assert ( v < = max ) ;
# if 1
for ( i = 0 ; i < v ; i + + ) {
put_cabac ( c , state , 1 ) ;
if ( i < max_index ) state + + ;
}
if ( truncated = = 0 | | v < max )
put_cabac ( c , state , 0 ) ;
# else
if ( v < = max_index ) {
for ( i = 0 ; i < v ; i + + ) {
put_cabac ( c , state + i , 1 ) ;
}
if ( truncated = = 0 | | v < max )
put_cabac ( c , state + i , 0 ) ;
} else {
for ( i = 0 ; i < = max_index ; i + + ) {
put_cabac ( c , state + i , 1 ) ;
}
for ( ; i < v ; i + + ) {
put_cabac ( c , state + max_index , 1 ) ;
}
if ( truncated = = 0 | | v < max )
put_cabac ( c , state + max_index , 0 ) ;
}
# endif
}
/**
* put unary exp golomb k - th order binarization .
*/
static void put_cabac_ueg ( CABACContext * c , uint8_t * state , int v , int max , int is_signed , int k , int max_index ) {
int i ;
if ( v = = 0 )
put_cabac ( c , state , 0 ) ;
else {
const int sign = v < 0 ;
if ( is_signed ) v = FFABS ( v ) ;
if ( v < max ) {
for ( i = 0 ; i < v ; i + + ) {
put_cabac ( c , state , 1 ) ;
if ( i < max_index ) state + + ;
}
put_cabac ( c , state , 0 ) ;
} else {
int m = 1 < < k ;
for ( i = 0 ; i < max ; i + + ) {
put_cabac ( c , state , 1 ) ;
if ( i < max_index ) state + + ;
}
v - = max ;
while ( v > = m ) { //FIXME optimize
put_cabac_bypass ( c , 1 ) ;
v - = m ;
m + = m ;
}
put_cabac_bypass ( c , 0 ) ;
while ( m > > = 1 ) {
put_cabac_bypass ( c , v & m ) ;
}
}
if ( is_signed )
put_cabac_bypass ( c , sign ) ;
}
}
static void refill ( CABACContext * c ) {
# if CABAC_BITS == 16
c - > low + = ( c - > bytestream [ 0 ] < < 9 ) + ( c - > bytestream [ 1 ] < < 1 ) ;
# else
c - > low + = c - > bytestream [ 0 ] < < 1 ;
# endif
c - > low - = CABAC_MASK ;
c - > bytestream + = CABAC_BITS / 8 ;
}
static void refill2 ( CABACContext * c ) {
int i , x ;
x = c - > low ^ ( c - > low - 1 ) ;
i = 7 - ff_h264_norm_shift [ x > > ( CABAC_BITS - 1 ) ] ;
x = - CABAC_MASK ;
# if CABAC_BITS == 16
x + = ( c - > bytestream [ 0 ] < < 9 ) + ( c - > bytestream [ 1 ] < < 1 ) ;
# else
x + = c - > bytestream [ 0 ] < < 1 ;
# endif
c - > low + = x < < i ;
c - > bytestream + = CABAC_BITS / 8 ;
}
static inline void renorm_cabac_decoder ( CABACContext * c ) {
while ( c - > range < 0x100 ) {
c - > range + = c - > range ;
c - > low + = c - > low ;
if ( ! ( c - > low & CABAC_MASK ) )
refill ( c ) ;
}
}
static inline void renorm_cabac_decoder_once ( CABACContext * c ) {
# ifdef ARCH_X86_DISABLED
int temp ;
#if 0
//P3:683 athlon:475
asm (
" lea -0x100(%0), %2 \n \t "
" shr $31, %2 \n \t " //FIXME 31->63 for x86-64
" shl %%cl, %0 \n \t "
" shl %%cl, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +c " ( temp )
) ;
# elif 0
//P3:680 athlon:474
asm (
" cmp $0x100, %0 \n \t "
" setb %%cl \n \t " //FIXME 31->63 for x86-64
" shl %%cl, %0 \n \t "
" shl %%cl, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +c " ( temp )
) ;
# elif 1
int temp2 ;
//P3:665 athlon:517
asm (
" lea -0x100(%0), %%eax \n \t "
" cdq \n \t "
" mov %0, %%eax \n \t "
" and %%edx, %0 \n \t "
" and %1, %%edx \n \t "
" add %%eax, %0 \n \t "
" add %%edx, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +a " ( temp ) , " +d " ( temp2 )
) ;
# elif 0
int temp2 ;
//P3:673 athlon:509
asm (
" cmp $0x100, %0 \n \t "
" sbb %%edx, %%edx \n \t "
" mov %0, %%eax \n \t "
" and %%edx, %0 \n \t "
" and %1, %%edx \n \t "
" add %%eax, %0 \n \t "
" add %%edx, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +a " ( temp ) , " +d " ( temp2 )
) ;
# else
int temp2 ;
//P3:677 athlon:511
asm (
" cmp $0x100, %0 \n \t "
" lea (%0, %0), %%eax \n \t "
" lea (%1, %1), %%edx \n \t "
" cmovb %%eax, %0 \n \t "
" cmovb %%edx, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +a " ( temp ) , " +d " ( temp2 )
) ;
# endif
# else
//P3:675 athlon:476
int shift = ( uint32_t ) ( c - > range - 0x100 ) > > 31 ;
c - > range < < = shift ;
c - > low < < = shift ;
# endif
if ( ! ( c - > low & CABAC_MASK ) )
refill ( c ) ;
}
static int always_inline get_cabac_inline ( CABACContext * c , uint8_t * const state ) {
//FIXME gcc generates duplicate load/stores for c->low and c->range
# define LOW "0"
# define RANGE "4"
# ifdef ARCH_X86_64
# define BYTESTART "16"
# define BYTE "24"
# define BYTEEND "32"
# else
# define BYTESTART "12"
# define BYTE "16"
# define BYTEEND "20"
# endif
# if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
int bit ;
# ifndef BRANCHLESS_CABAC_DECODER
asm volatile (
" movzbl (%1), %0 \n \t "
" movl " RANGE " (%2), %%ebx \n \t "
" movl " RANGE " (%2), %%edx \n \t "
" andl $0xC0, %%ebx \n \t "
" movzbl " MANGLE ( ff_h264_lps_range ) " (%0, %%ebx, 2), %%esi \n \t "
" movl " LOW " (%2), %%ebx \n \t "
//eax:state ebx:low, edx:range, esi:RangeLPS
" subl %%esi, %%edx \n \t "
" movl %%edx, %%ecx \n \t "
" shll $17, %%ecx \n \t "
" cmpl %%ecx, %%ebx \n \t "
" ja 1f \n \t "
# if 1
//athlon:4067 P3:4110
" lea -0x100(%%edx), %%ecx \n \t "
" shr $31, %%ecx \n \t "
" shl %%cl, %%edx \n \t "
" shl %%cl, %%ebx \n \t "
# else
//athlon:4057 P3:4130
" cmp $0x100, %%edx \n \t " //FIXME avoidable
" setb %%cl \n \t "
" shl %%cl, %%edx \n \t "
" shl %%cl, %%ebx \n \t "
# endif
" movzbl " MANGLE ( ff_h264_mps_state ) " (%0), %%ecx \n \t "
" movb %%cl, (%1) \n \t "
//eax:state ebx:low, edx:range, esi:RangeLPS
" test %%bx, %%bx \n \t "
" jnz 2f \n \t "
" mov " BYTE " (%2), %% " REG_S " \n \t "
" subl $0xFFFF, %%ebx \n \t "
" movzwl (%% " REG_S " ), %%ecx \n \t "
" bswap %%ecx \n \t "
" shrl $15, %%ecx \n \t "
" add $2, %% " REG_S " \n \t "
" addl %%ecx, %%ebx \n \t "
" mov %% " REG_S " , " BYTE " (%2) \n \t "
" jmp 2f \n \t "
" 1: \n \t "
//eax:state ebx:low, edx:range, esi:RangeLPS
" subl %%ecx, %%ebx \n \t "
" movl %%esi, %%edx \n \t "
" movzbl " MANGLE ( ff_h264_norm_shift ) " (%%esi), %%ecx \n \t "
" shll %%cl, %%ebx \n \t "
" shll %%cl, %%edx \n \t "
" movzbl " MANGLE ( ff_h264_lps_state ) " (%0), %%ecx \n \t "
" movb %%cl, (%1) \n \t "
" add $1, %0 \n \t "
" test %%bx, %%bx \n \t "
" jnz 2f \n \t "
" mov " BYTE " (%2), %% " REG_c " \n \t "
" movzwl (%% " REG_c " ), %%esi \n \t "
" bswap %%esi \n \t "
" shrl $15, %%esi \n \t "
" subl $0xFFFF, %%esi \n \t "
" add $2, %% " REG_c " \n \t "
" mov %% " REG_c " , " BYTE " (%2) \n \t "
" leal -1(%%ebx), %%ecx \n \t "
" xorl %%ebx, %%ecx \n \t "
" shrl $15, %%ecx \n \t "
" movzbl " MANGLE ( ff_h264_norm_shift ) " (%%ecx), %%ecx \n \t "
" neg %%ecx \n \t "
" add $7, %%ecx \n \t "
" shll %%cl , %%esi \n \t "
" addl %%esi, %%ebx \n \t "
" 2: \n \t "
" movl %%edx, " RANGE " (%2) \n \t "
" movl %%ebx, " LOW " (%2) \n \t "
: " =&a " ( bit ) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
: " r " ( state ) , " r " ( c )
: " % " REG_c , " %ebx " , " %edx " , " % " REG_S , " memory "
) ;
bit & = 1 ;
# else /* BRANCHLESS_CABAC_DECODER */
# if defined CMOV_IS_FAST
# define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
" mov " tmp " , %%ecx \n \t " \
" shl $17 , " tmp " \n \t " \
" cmp " low " , " tmp " \n \t " \
" cmova %%ecx , " range " \n \t " \
" sbb %%ecx , %%ecx \n \t " \
" and %%ecx , " tmp " \n \t " \
" sub " tmp " , " low " \n \t " \
" xor %%ecx , " ret " \n \t "
# else /* CMOV_IS_FAST */
# define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
" mov " tmp " , %%ecx \n \t " \
" shl $17 , " tmp " \n \t " \
" sub " low " , " tmp " \n \t " \
" sar $31 , " tmp " \n \t " /*lps_mask*/ \
" sub %%ecx , " range " \n \t " /*RangeLPS - range*/ \
" and " tmp " , " range " \n \t " /*(RangeLPS - range)&lps_mask*/ \
" add %%ecx , " range " \n \t " /*new range*/ \
" shl $17 , %%ecx \n \t " \
" and " tmp " , %%ecx \n \t " \
" sub %%ecx , " low " \n \t " \
" xor " tmp " , " ret " \n \t "
# endif /* CMOV_IS_FAST */
# define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
" movzbl " statep " , " ret " \n \t " \
" mov " range " , " tmp " \n \t " \
" and $0xC0 , " range " \n \t " \
" movzbl " MANGLE ( ff_h264_lps_range ) " ( " ret " , " range " , 2), " range " \n \t " \
" sub " range " , " tmp " \n \t " \
BRANCHLESS_GET_CABAC_UPDATE ( ret , cabac , statep , low , lowword , range , tmp , tmpbyte ) \
" movzbl " MANGLE ( ff_h264_norm_shift ) " ( " range " ), %%ecx \n \t " \
" shl %%cl , " range " \n \t " \
" movzbl " MANGLE ( ff_h264_mlps_state ) " +128( " ret " ), " tmp " \n \t " \
" mov " tmpbyte " , " statep " \n \t " \
" shl %%cl , " low " \n \t " \
" test " lowword " , " lowword " \n \t " \
" jnz 1f \n \t " \
" mov " BYTE " ( " cabac " ), %% " REG_c " \n \t " \
" movzwl (%% " REG_c " ) , " tmp " \n \t " \
" bswap " tmp " \n \t " \
" shr $15 , " tmp " \n \t " \
" sub $0xFFFF , " tmp " \n \t " \
" add $2 , %% " REG_c " \n \t " \
" mov %% " REG_c " , " BYTE " ( " cabac " ) \n \t " \
" lea -1( " low " ) , %%ecx \n \t " \
" xor " low " , %%ecx \n \t " \
" shr $15 , %%ecx \n \t " \
" movzbl " MANGLE ( ff_h264_norm_shift ) " (%%ecx), %%ecx \n \t " \
" neg %%ecx \n \t " \
" add $7 , %%ecx \n \t " \
" shl %%cl , " tmp " \n \t " \
" add " tmp " , " low " \n \t " \
" 1: \n \t "
asm volatile (
" movl " RANGE " (%2), %%esi \n \t "
" movl " LOW " (%2), %%ebx \n \t "
BRANCHLESS_GET_CABAC ( " %0 " , " %2 " , " (%1) " , " %%ebx " , " %%bx " , " %%esi " , " %%edx " , " %%dl " )
" movl %%esi, " RANGE " (%2) \n \t "
" movl %%ebx, " LOW " (%2) \n \t "
: " =&a " ( bit )
: " r " ( state ) , " r " ( c )
: " % " REG_c , " %ebx " , " %edx " , " %esi " , " memory "
) ;
bit & = 1 ;
# endif /* BRANCHLESS_CABAC_DECODER */
# else /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
int s = * state ;
int RangeLPS = ff_h264_lps_range [ 2 * ( c - > range & 0xC0 ) + s ] ;
int bit , lps_mask attribute_unused ;
c - > range - = RangeLPS ;
# ifndef BRANCHLESS_CABAC_DECODER
if ( c - > low < ( c - > range < < 17 ) ) {
bit = s & 1 ;
* state = ff_h264_mps_state [ s ] ;
renorm_cabac_decoder_once ( c ) ;
} else {
bit = ff_h264_norm_shift [ RangeLPS ] ;
c - > low - = ( c - > range < < 17 ) ;
* state = ff_h264_lps_state [ s ] ;
c - > range = RangeLPS < < bit ;
c - > low < < = bit ;
bit = ( s & 1 ) ^ 1 ;
if ( ! ( c - > low & 0xFFFF ) ) {
refill2 ( c ) ;
}
}
# else /* BRANCHLESS_CABAC_DECODER */
lps_mask = ( ( c - > range < < 17 ) - c - > low ) > > 31 ;
c - > low - = ( c - > range < < 17 ) & lps_mask ;
c - > range + = ( RangeLPS - c - > range ) & lps_mask ;
s ^ = lps_mask ;
* state = ( ff_h264_mlps_state + 128 ) [ s ] ;
bit = s & 1 ;
lps_mask = ff_h264_norm_shift [ c - > range ] ;
c - > range < < = lps_mask ;
c - > low < < = lps_mask ;
if ( ! ( c - > low & CABAC_MASK ) )
refill2 ( c ) ;
# endif /* BRANCHLESS_CABAC_DECODER */
# endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
return bit ;
}
static int __attribute ( ( noinline ) ) get_cabac_noinline ( CABACContext * c , uint8_t * const state ) {
return get_cabac_inline ( c , state ) ;
}
static int get_cabac ( CABACContext * c , uint8_t * const state ) {
return get_cabac_inline ( c , state ) ;
}
static int get_cabac_bypass ( CABACContext * c ) {
#if 0 //not faster
int bit ;
asm volatile (
" movl " RANGE " (%1), %%ebx \n \t "
" movl " LOW " (%1), %%eax \n \t "
" shl $17, %%ebx \n \t "
" add %%eax, %%eax \n \t "
" sub %%ebx, %%eax \n \t "
" cdq \n \t "
" and %%edx, %%ebx \n \t "
" add %%ebx, %%eax \n \t "
" test %%ax, %%ax \n \t "
" jnz 1f \n \t "
" movl " BYTE " (%1), %% " REG_b " \n \t "
" subl $0xFFFF, %%eax \n \t "
" movzwl (%% " REG_b " ), %%ecx \n \t "
" bswap %%ecx \n \t "
" shrl $15, %%ecx \n \t "
" addl $2, %% " REG_b " \n \t "
" addl %%ecx, %%eax \n \t "
" movl %% " REG_b " , " BYTE " (%1) \n \t "
" 1: \n \t "
" movl %%eax, " LOW " (%1) \n \t "
: " =&d " ( bit )
: " r " ( c )
: " %eax " , " % " REG_b , " %ecx " , " memory "
) ;
return bit + 1 ;
# else
int range ;
c - > low + = c - > low ;
if ( ! ( c - > low & CABAC_MASK ) )
refill ( c ) ;
range = c - > range < < 17 ;
if ( c - > low < range ) {
return 0 ;
} else {
c - > low - = range ;
return 1 ;
}
# endif
}
static always_inline int get_cabac_bypass_sign ( CABACContext * c , int val ) {
# if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
asm volatile (
" movl " RANGE " (%1), %%ebx \n \t "
" movl " LOW " (%1), %%eax \n \t "
" shl $17, %%ebx \n \t "
" add %%eax, %%eax \n \t "
" sub %%ebx, %%eax \n \t "
" cdq \n \t "
" and %%edx, %%ebx \n \t "
" add %%ebx, %%eax \n \t "
" xor %%edx, %%ecx \n \t "
" sub %%edx, %%ecx \n \t "
" test %%ax, %%ax \n \t "
" jnz 1f \n \t "
" mov " BYTE " (%1), %% " REG_b " \n \t "
" subl $0xFFFF, %%eax \n \t "
" movzwl (%% " REG_b " ), %%edx \n \t "
" bswap %%edx \n \t "
" shrl $15, %%edx \n \t "
" add $2, %% " REG_b " \n \t "
" addl %%edx, %%eax \n \t "
" mov %% " REG_b " , " BYTE " (%1) \n \t "
" 1: \n \t "
" movl %%eax, " LOW " (%1) \n \t "
: " +c " ( val )
: " r " ( c )
: " %eax " , " % " REG_b , " %edx " , " memory "
) ;
return val ;
# else
int range , mask ;
c - > low + = c - > low ;
if ( ! ( c - > low & CABAC_MASK ) )
refill ( c ) ;
range = c - > range < < 17 ;
c - > low - = range ;
mask = c - > low > > 31 ;
range & = mask ;
c - > low + = range ;
return ( val ^ mask ) - mask ;
# endif
}
//FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
//FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard)
# if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
static int decode_significance_x86 ( CABACContext * c , int max_coeff , uint8_t * significant_coeff_ctx_base , int * index ) {
void * end = significant_coeff_ctx_base + max_coeff - 1 ;
int minusstart = - ( int ) significant_coeff_ctx_base ;
int minusindex = 4 - ( int ) index ;
int coeff_count ;
asm volatile (
" movl " RANGE " (%3), %%esi \n \t "
" movl " LOW " (%3), %%ebx \n \t "
" 2: \n \t "
BRANCHLESS_GET_CABAC ( " %%edx " , " %3 " , " (%1) " , " %%ebx " , " %%bx " , " %%esi " , " %%eax " , " %%al " )
" test $1, %%edx \n \t "
" jz 3f \n \t "
BRANCHLESS_GET_CABAC ( " %%edx " , " %3 " , " 61(%1) " , " %%ebx " , " %%bx " , " %%esi " , " %%eax " , " %%al " )
" mov %2, %% " REG_a " \n \t "
" movl %4, %%ecx \n \t "
" add %1, %% " REG_c " \n \t "
" movl %%ecx, (%% " REG_a " ) \n \t "
" test $1, %%edx \n \t "
" jnz 4f \n \t "
" add $4, %% " REG_a " \n \t "
" mov %% " REG_a " , %2 \n \t "
" 3: \n \t "
" add $1, %1 \n \t "
" cmp %5, %1 \n \t "
" jb 2b \n \t "
" mov %2, %% " REG_a " \n \t "
" movl %4, %%ecx \n \t "
" add %1, %% " REG_c " \n \t "
" movl %%ecx, (%% " REG_a " ) \n \t "
" 4: \n \t "
" add %6, %%eax \n \t "
" shr $2, %%eax \n \t "
" movl %%esi, " RANGE " (%3) \n \t "
" movl %%ebx, " LOW " (%3) \n \t "
: " =&a " ( coeff_count ) , " +r " ( significant_coeff_ctx_base ) , " +m " ( index ) \
: " r " ( c ) , " m " ( minusstart ) , " m " ( end ) , " m " ( minusindex ) \
: " % " REG_c , " %ebx " , " %edx " , " %esi " , " memory " \
) ;
return coeff_count ;
}
static int decode_significance_8x8_x86 ( CABACContext * c , uint8_t * significant_coeff_ctx_base , int * index , uint8_t * sig_off ) {
int minusindex = 4 - ( int ) index ;
int coeff_count ;
long last = 0 ;
asm volatile (
" movl " RANGE " (%3), %%esi \n \t "
" movl " LOW " (%3), %%ebx \n \t "
" mov %1, %% " REG_D " \n \t "
" 2: \n \t "
" mov %6, %% " REG_a " \n \t "
" movzbl (%% " REG_a " , %% " REG_D " ), %%edi \n \t "
" add %5, %% " REG_D " \n \t "
BRANCHLESS_GET_CABAC ( " %%edx " , " %3 " , " (%% " REG_D " ) " , " %%ebx " , " %%bx " , " %%esi " , " %%eax " , " %%al " )
" mov %1, %%edi \n \t "
" test $1, %%edx \n \t "
" jz 3f \n \t "
" movzbl " MANGLE ( last_coeff_flag_offset_8x8 ) " (%%edi), %%edi \n \t "
" add %5, %% " REG_D " \n \t "
BRANCHLESS_GET_CABAC ( " %%edx " , " %3 " , " 15(%% " REG_D " ) " , " %%ebx " , " %%bx " , " %%esi " , " %%eax " , " %%al " )
" mov %2, %% " REG_a " \n \t "
" mov %1, %%edi \n \t "
" movl %%edi, (%% " REG_a " ) \n \t "
" test $1, %%edx \n \t "
" jnz 4f \n \t "
" add $4, %% " REG_a " \n \t "
" mov %% " REG_a " , %2 \n \t "
" 3: \n \t "
" addl $1, %%edi \n \t "
" mov %%edi, %1 \n \t "
" cmpl $63, %%edi \n \t "
" jb 2b \n \t "
" mov %2, %% " REG_a " \n \t "
" movl %%edi, (%% " REG_a " ) \n \t "
" 4: \n \t "
" addl %4, %%eax \n \t "
" shr $2, %%eax \n \t "
" movl %%esi, " RANGE " (%3) \n \t "
" movl %%ebx, " LOW " (%3) \n \t "
: " =&a " ( coeff_count ) , " +m " ( last ) , " +m " ( index ) \
: " r " ( c ) , " m " ( minusindex ) , " m " ( significant_coeff_ctx_base ) , " m " ( sig_off ) \
: " % " REG_c , " %ebx " , " %edx " , " %esi " , " % " REG_D , " memory " \
) ;
return coeff_count ;
}
# endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
/**
*
* @ return the number of bytes read or 0 if no end
*/
static int get_cabac_terminate ( CABACContext * c ) {
c - > range - = 2 ;
if ( c - > low < c - > range < < 17 ) {
renorm_cabac_decoder_once ( c ) ;
return 0 ;
} else {
return c - > bytestream - c - > bytestream_start ;
}
}
/**
* get ( truncated ) unnary binarization .
*/
static int get_cabac_u ( CABACContext * c , uint8_t * state , int max , int max_index , int truncated ) {
int i ;
for ( i = 0 ; i < max ; i + + ) {
if ( get_cabac ( c , state ) = = 0 )
return i ;
if ( i < max_index ) state + + ;
}
return truncated ? max : - 1 ;
}
/**
* get unary exp golomb k - th order binarization .
*/
static int get_cabac_ueg ( CABACContext * c , uint8_t * state , int max , int is_signed , int k , int max_index ) {
int i , v ;
int m = 1 < < k ;
if ( get_cabac ( c , state ) = = 0 )
return 0 ;
if ( 0 < max_index ) state + + ;
for ( i = 1 ; i < max ; i + + ) {
if ( get_cabac ( c , state ) = = 0 ) {
if ( is_signed & & get_cabac_bypass ( c ) ) {
return - i ;
} else
return i ;
}
if ( i < max_index ) state + + ;
}
while ( get_cabac_bypass ( c ) ) {
i + = m ;
m + = m ;
}
v = 0 ;
while ( m > > = 1 ) {
v + = v + get_cabac_bypass ( c ) ;
}
i + = v ;
if ( is_signed & & get_cabac_bypass ( c ) ) {
return - i ;
} else
return i ;
}