mirror of https://github.com/FFmpeg/FFmpeg.git
Originally committed as revision 10640 to svn://svn.ffmpeg.org/ffmpeg/trunkpull/126/head
parent
298726ba55
commit
89523beea4
14 changed files with 404 additions and 292 deletions
@ -0,0 +1,95 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
|
||||
/**
|
||||
* @file check_altivec.c |
||||
* Checks for AltiVec presence. |
||||
*/ |
||||
|
||||
#ifdef __APPLE__ |
||||
#include <sys/sysctl.h> |
||||
#elif __AMIGAOS4__ |
||||
#include <exec/exec.h> |
||||
#include <interfaces/exec.h> |
||||
#include <proto/exec.h> |
||||
#else |
||||
#include <signal.h> |
||||
#include <setjmp.h> |
||||
|
||||
static sigjmp_buf jmpbuf; |
||||
static volatile sig_atomic_t canjump = 0; |
||||
|
||||
static void sigill_handler (int sig) |
||||
{ |
||||
if (!canjump) { |
||||
signal (sig, SIG_DFL); |
||||
raise (sig); |
||||
} |
||||
|
||||
canjump = 0; |
||||
siglongjmp (jmpbuf, 1); |
||||
} |
||||
#endif /* __APPLE__ */ |
||||
|
||||
/**
|
||||
* This function MAY rely on signal() or fork() in order to make sure altivec |
||||
* is present |
||||
*/ |
||||
|
||||
int has_altivec(void) |
||||
{ |
||||
#ifdef __AMIGAOS4__ |
||||
ULONG result = 0; |
||||
extern struct ExecIFace *IExec; |
||||
|
||||
IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); |
||||
if (result == VECTORTYPE_ALTIVEC) return 1; |
||||
return 0; |
||||
#elif __APPLE__ |
||||
int sels[2] = {CTL_HW, HW_VECTORUNIT}; |
||||
int has_vu = 0; |
||||
size_t len = sizeof(has_vu); |
||||
int err; |
||||
|
||||
err = sysctl(sels, 2, &has_vu, &len, NULL, 0); |
||||
|
||||
if (err == 0) return (has_vu != 0); |
||||
return 0; |
||||
#else |
||||
/* Do it the brute-force way, borrowed from the libmpeg2 library. */ |
||||
{ |
||||
signal (SIGILL, sigill_handler); |
||||
if (sigsetjmp (jmpbuf, 1)) { |
||||
signal (SIGILL, SIG_DFL); |
||||
} else { |
||||
canjump = 1; |
||||
|
||||
asm volatile ("mtspr 256, %0\n\t" |
||||
"vand %%v0, %%v0, %%v0" |
||||
: |
||||
: "r" (-1)); |
||||
|
||||
signal (SIGILL, SIG_DFL); |
||||
return 1; |
||||
} |
||||
} |
||||
return 0; |
||||
#endif /* __AMIGAOS4__ */ |
||||
} |
||||
|
@ -0,0 +1,153 @@ |
||||
/*
|
||||
* High quality image resampling with polyphase filters |
||||
* Copyright (c) 2001 Fabrice Bellard. |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
/**
|
||||
* @file imgresample_altivec.c |
||||
* High quality image resampling with polyphase filters - AltiVec bits |
||||
*/ |
||||
|
||||
#include "gcc_fixes.h" |
||||
|
||||
typedef union { |
||||
vector unsigned char v; |
||||
unsigned char c[16]; |
||||
} vec_uc_t; |
||||
|
||||
typedef union { |
||||
vector signed short v; |
||||
signed short s[8]; |
||||
} vec_ss_t; |
||||
|
||||
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, |
||||
int wrap, int16_t *filter) |
||||
{ |
||||
int sum, i; |
||||
const uint8_t *s; |
||||
vector unsigned char *tv, tmp, dstv, zero; |
||||
vec_ss_t srchv[4], srclv[4], fv[4]; |
||||
vector signed short zeros, sumhv, sumlv; |
||||
s = src; |
||||
|
||||
for(i=0;i<4;i++) |
||||
{ |
||||
/*
|
||||
The vec_madds later on does an implicit >>15 on the result. |
||||
Since FILTER_BITS is 8, and we have 15 bits of magnitude in |
||||
a signed short, we have just enough bits to pre-shift our |
||||
filter constants <<7 to compensate for vec_madds. |
||||
*/ |
||||
fv[i].s[0] = filter[i] << (15-FILTER_BITS); |
||||
fv[i].v = vec_splat(fv[i].v, 0); |
||||
} |
||||
|
||||
zero = vec_splat_u8(0); |
||||
zeros = vec_splat_s16(0); |
||||
|
||||
|
||||
/*
|
||||
When we're resampling, we'd ideally like both our input buffers, |
||||
and output buffers to be 16-byte aligned, so we can do both aligned |
||||
reads and writes. Sadly we can't always have this at the moment, so |
||||
we opt for aligned writes, as unaligned writes have a huge overhead. |
||||
To do this, do enough scalar resamples to get dst 16-byte aligned. |
||||
*/ |
||||
i = (-(int)dst) & 0xf; |
||||
while(i>0) { |
||||
sum = s[0 * wrap] * filter[0] + |
||||
s[1 * wrap] * filter[1] + |
||||
s[2 * wrap] * filter[2] + |
||||
s[3 * wrap] * filter[3]; |
||||
sum = sum >> FILTER_BITS; |
||||
if (sum<0) sum = 0; else if (sum>255) sum=255; |
||||
dst[0] = sum; |
||||
dst++; |
||||
s++; |
||||
dst_width--; |
||||
i--; |
||||
} |
||||
|
||||
/* Do our altivec resampling on 16 pixels at once. */ |
||||
while(dst_width>=16) { |
||||
/*
|
||||
Read 16 (potentially unaligned) bytes from each of |
||||
4 lines into 4 vectors, and split them into shorts. |
||||
Interleave the multipy/accumulate for the resample |
||||
filter with the loads to hide the 3 cycle latency |
||||
the vec_madds have. |
||||
*/ |
||||
tv = (vector unsigned char *) &s[0 * wrap]; |
||||
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); |
||||
srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); |
||||
srclv[0].v = (vector signed short) vec_mergel(zero, tmp); |
||||
sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); |
||||
sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); |
||||
|
||||
tv = (vector unsigned char *) &s[1 * wrap]; |
||||
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); |
||||
srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); |
||||
srclv[1].v = (vector signed short) vec_mergel(zero, tmp); |
||||
sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); |
||||
sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); |
||||
|
||||
tv = (vector unsigned char *) &s[2 * wrap]; |
||||
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); |
||||
srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); |
||||
srclv[2].v = (vector signed short) vec_mergel(zero, tmp); |
||||
sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); |
||||
sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); |
||||
|
||||
tv = (vector unsigned char *) &s[3 * wrap]; |
||||
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); |
||||
srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); |
||||
srclv[3].v = (vector signed short) vec_mergel(zero, tmp); |
||||
sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); |
||||
sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); |
||||
|
||||
/*
|
||||
Pack the results into our destination vector, |
||||
and do an aligned write of that back to memory. |
||||
*/ |
||||
dstv = vec_packsu(sumhv, sumlv) ; |
||||
vec_st(dstv, 0, (vector unsigned char *) dst); |
||||
|
||||
dst+=16; |
||||
s+=16; |
||||
dst_width-=16; |
||||
} |
||||
|
||||
/*
|
||||
If there are any leftover pixels, resample them |
||||
with the slow scalar method. |
||||
*/ |
||||
while(dst_width>0) { |
||||
sum = s[0 * wrap] * filter[0] + |
||||
s[1 * wrap] * filter[1] + |
||||
s[2 * wrap] * filter[2] + |
||||
s[3 * wrap] * filter[3]; |
||||
sum = sum >> FILTER_BITS; |
||||
if (sum<0) sum = 0; else if (sum>255) sum=255; |
||||
dst[0] = sum; |
||||
dst++; |
||||
s++; |
||||
dst_width--; |
||||
} |
||||
} |
||||
|
@ -0,0 +1,24 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef IMGRESAMPLE_ALTIVEC_H |
||||
#define IMGRESAMPLE_ALTIVEC_H |
||||
|
||||
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, |
||||
int wrap, int16_t *filter); |
||||
#endif /* IMGRESAMPLE_ALTIVEC_H */ |
@ -0,0 +1,106 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
/**
|
||||
* @file util_altivec.h |
||||
* Contains misc utility macros and inline functions |
||||
*/ |
||||
|
||||
#ifndef UTIL_ALTIVEC_H |
||||
#define UTIL_ALTIVEC_H |
||||
|
||||
// used to build registers permutation vectors (vcprm)
|
||||
// the 's' are for words in the _s_econd vector
|
||||
#define WORD_0 0x00,0x01,0x02,0x03 |
||||
#define WORD_1 0x04,0x05,0x06,0x07 |
||||
#define WORD_2 0x08,0x09,0x0a,0x0b |
||||
#define WORD_3 0x0c,0x0d,0x0e,0x0f |
||||
#define WORD_s0 0x10,0x11,0x12,0x13 |
||||
#define WORD_s1 0x14,0x15,0x16,0x17 |
||||
#define WORD_s2 0x18,0x19,0x1a,0x1b |
||||
#define WORD_s3 0x1c,0x1d,0x1e,0x1f |
||||
|
||||
#ifdef __APPLE_CC__ |
||||
#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) |
||||
#else |
||||
#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} |
||||
#endif |
||||
|
||||
// vcprmle is used to keep the same index as in the SSE version.
|
||||
// it's the same as vcprm, with the index inversed
|
||||
// ('le' is Little Endian)
|
||||
#define vcprmle(a,b,c,d) vcprm(d,c,b,a) |
||||
|
||||
// used to build inverse/identity vectors (vcii)
|
||||
// n is _n_egative, p is _p_ositive
|
||||
#define FLOAT_n -1. |
||||
#define FLOAT_p 1. |
||||
|
||||
|
||||
#ifdef __APPLE_CC__ |
||||
#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) |
||||
#else |
||||
#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} |
||||
#endif |
||||
|
||||
// Transpose 8x8 matrix of 16-bit elements (in-place)
|
||||
#define TRANSPOSE8(a,b,c,d,e,f,g,h) \ |
||||
do { \
|
||||
vector signed short A1, B1, C1, D1, E1, F1, G1, H1; \
|
||||
vector signed short A2, B2, C2, D2, E2, F2, G2, H2; \
|
||||
\
|
||||
A1 = vec_mergeh (a, e); \
|
||||
B1 = vec_mergel (a, e); \
|
||||
C1 = vec_mergeh (b, f); \
|
||||
D1 = vec_mergel (b, f); \
|
||||
E1 = vec_mergeh (c, g); \
|
||||
F1 = vec_mergel (c, g); \
|
||||
G1 = vec_mergeh (d, h); \
|
||||
H1 = vec_mergel (d, h); \
|
||||
\
|
||||
A2 = vec_mergeh (A1, E1); \
|
||||
B2 = vec_mergel (A1, E1); \
|
||||
C2 = vec_mergeh (B1, F1); \
|
||||
D2 = vec_mergel (B1, F1); \
|
||||
E2 = vec_mergeh (C1, G1); \
|
||||
F2 = vec_mergel (C1, G1); \
|
||||
G2 = vec_mergeh (D1, H1); \
|
||||
H2 = vec_mergel (D1, H1); \
|
||||
\
|
||||
a = vec_mergeh (A2, E2); \
|
||||
b = vec_mergel (A2, E2); \
|
||||
c = vec_mergeh (B2, F2); \
|
||||
d = vec_mergel (B2, F2); \
|
||||
e = vec_mergeh (C2, G2); \
|
||||
f = vec_mergel (C2, G2); \
|
||||
g = vec_mergeh (D2, H2); \
|
||||
h = vec_mergel (D2, H2); \
|
||||
} while (0) |
||||
|
||||
|
||||
/** \brief loads unaligned vector \a *src with offset \a offset
|
||||
and returns it */ |
||||
static inline vector unsigned char unaligned_load(int offset, uint8_t *src) |
||||
{ |
||||
register vector unsigned char first = vec_ld(offset, src); |
||||
register vector unsigned char second = vec_ld(offset+15, src); |
||||
register vector unsigned char mask = vec_lvsl(offset, src); |
||||
return vec_perm(first, second, mask); |
||||
} |
||||
|
||||
#endif /* UTIL_ALTIVEC_H */ |
Loading…
Reference in new issue