|
|
|
@ -51,23 +51,23 @@ |
|
|
|
|
*/ |
|
|
|
|
static void clear_blocks_dcbz32_ppc(int16_t *blocks) |
|
|
|
|
{ |
|
|
|
|
register int misal = ((unsigned long)blocks & 0x00000010); |
|
|
|
|
register int misal = ((unsigned long) blocks & 0x00000010); |
|
|
|
|
register int i = 0; |
|
|
|
|
|
|
|
|
|
if (misal) { |
|
|
|
|
((unsigned long*)blocks)[0] = 0L; |
|
|
|
|
((unsigned long*)blocks)[1] = 0L; |
|
|
|
|
((unsigned long*)blocks)[2] = 0L; |
|
|
|
|
((unsigned long*)blocks)[3] = 0L; |
|
|
|
|
((unsigned long *) blocks)[0] = 0L; |
|
|
|
|
((unsigned long *) blocks)[1] = 0L; |
|
|
|
|
((unsigned long *) blocks)[2] = 0L; |
|
|
|
|
((unsigned long *) blocks)[3] = 0L; |
|
|
|
|
i += 16; |
|
|
|
|
} |
|
|
|
|
for ( ; i < sizeof(int16_t)*6*64-31 ; i += 32) { |
|
|
|
|
__asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
|
|
|
|
} |
|
|
|
|
for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32) |
|
|
|
|
__asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory"); |
|
|
|
|
if (misal) { |
|
|
|
|
((unsigned long*)blocks)[188] = 0L; |
|
|
|
|
((unsigned long*)blocks)[189] = 0L; |
|
|
|
|
((unsigned long*)blocks)[190] = 0L; |
|
|
|
|
((unsigned long*)blocks)[191] = 0L; |
|
|
|
|
((unsigned long *) blocks)[188] = 0L; |
|
|
|
|
((unsigned long *) blocks)[189] = 0L; |
|
|
|
|
((unsigned long *) blocks)[190] = 0L; |
|
|
|
|
((unsigned long *) blocks)[191] = 0L; |
|
|
|
|
i += 16; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -77,23 +77,23 @@ static void clear_blocks_dcbz32_ppc(int16_t *blocks) |
|
|
|
|
#if HAVE_DCBZL |
|
|
|
|
static void clear_blocks_dcbz128_ppc(int16_t *blocks) |
|
|
|
|
{ |
|
|
|
|
register int misal = ((unsigned long)blocks & 0x0000007f); |
|
|
|
|
register int misal = ((unsigned long) blocks & 0x0000007f); |
|
|
|
|
register int i = 0; |
|
|
|
|
|
|
|
|
|
if (misal) { |
|
|
|
|
/* We could probably also optimize this case,
|
|
|
|
|
* but there's not much point as the machines |
|
|
|
|
* aren't available yet (2003-06-26). */ |
|
|
|
|
memset(blocks, 0, sizeof(int16_t)*6*64); |
|
|
|
|
memset(blocks, 0, sizeof(int16_t) * 6 * 64); |
|
|
|
|
} else { |
|
|
|
|
for (; i < sizeof(int16_t) * 6 * 64; i += 128) |
|
|
|
|
__asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory"); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
for ( ; i < sizeof(int16_t)*6*64 ; i += 128) { |
|
|
|
|
__asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
static void clear_blocks_dcbz128_ppc(int16_t *blocks) |
|
|
|
|
{ |
|
|
|
|
memset(blocks, 0, sizeof(int16_t)*6*64); |
|
|
|
|
memset(blocks, 0, sizeof(int16_t) * 6 * 64); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
@ -110,9 +110,8 @@ static long check_dcbzl_effect(void) |
|
|
|
|
register long i = 0; |
|
|
|
|
long count = 0; |
|
|
|
|
|
|
|
|
|
if (!fakedata) { |
|
|
|
|
if (!fakedata) |
|
|
|
|
return 0L; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fakedata_middle = (fakedata + 512); |
|
|
|
|
|
|
|
|
@ -120,12 +119,11 @@ static long check_dcbzl_effect(void) |
|
|
|
|
|
|
|
|
|
/* Below the constraint "b" seems to mean "address base register"
|
|
|
|
|
* in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */ |
|
|
|
|
__asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); |
|
|
|
|
__asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero)); |
|
|
|
|
|
|
|
|
|
for (i = 0; i < 1024 ; i ++) { |
|
|
|
|
if (fakedata[i] == (char)0) |
|
|
|
|
for (i = 0; i < 1024; i++) |
|
|
|
|
if (fakedata[i] == (char) 0) |
|
|
|
|
count++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
av_free(fakedata); |
|
|
|
|
|
|
|
|
@ -134,7 +132,7 @@ static long check_dcbzl_effect(void) |
|
|
|
|
#else |
|
|
|
|
static long check_dcbzl_effect(void) |
|
|
|
|
{ |
|
|
|
|
return 0; |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
@ -144,7 +142,7 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) |
|
|
|
|
|
|
|
|
|
// common optimizations whether AltiVec is available or not
|
|
|
|
|
if (!high_bit_depth) { |
|
|
|
|
switch (check_dcbzl_effect()) { |
|
|
|
|
switch (check_dcbzl_effect()) { |
|
|
|
|
case 32: |
|
|
|
|
c->clear_blocks = clear_blocks_dcbz32_ppc; |
|
|
|
|
break; |
|
|
|
@ -153,7 +151,7 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (PPC_ALTIVEC(av_get_cpu_flags())) { |
|
|
|
@ -172,11 +170,10 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) |
|
|
|
|
if (avctx->bits_per_raw_sample <= 8) { |
|
|
|
|
if ((avctx->idct_algo == FF_IDCT_AUTO) || |
|
|
|
|
(avctx->idct_algo == FF_IDCT_ALTIVEC)) { |
|
|
|
|
c->idct_put = ff_idct_put_altivec; |
|
|
|
|
c->idct_add = ff_idct_add_altivec; |
|
|
|
|
c->idct_put = ff_idct_put_altivec; |
|
|
|
|
c->idct_add = ff_idct_add_altivec; |
|
|
|
|
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|