mirror of https://github.com/opencv/opencv.git
parent
db45e04d58
commit
740941c8b8
64 changed files with 5337 additions and 2716 deletions
@ -0,0 +1,661 @@ |
||||
// Copyright 2012 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// This code is licensed under the same terms as WebM:
|
||||
// Software License Agreement: http://www.webmproject.org/license/software/
|
||||
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// ARM NEON version of speed-critical encoding functions.
|
||||
//
|
||||
// adapted from libvpx (http://www.webmproject.org/code/)
|
||||
|
||||
#include "./dsp.h" |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
extern "C" { |
||||
#endif |
||||
|
||||
#if defined(WEBP_USE_NEON) |
||||
|
||||
#include "../enc/vp8enci.h" |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
// Inverse transform.
|
||||
// This code is pretty much the same as TransformOneNEON in the decoder, except
|
||||
// for subtraction to *ref. See the comments there for algorithmic explanations.
|
||||
static void ITransformOne(const uint8_t* ref, |
||||
const int16_t* in, uint8_t* dst) { |
||||
const int kBPS = BPS; |
||||
const int16_t kC1C2[] = { 20091, 17734, 0, 0 }; // kC1 / (kC2 >> 1) / 0 / 0
|
||||
|
||||
__asm__ volatile ( |
||||
"vld1.16 {q1, q2}, [%[in]] \n" |
||||
"vld1.16 {d0}, [%[kC1C2]] \n" |
||||
|
||||
// d2: in[0]
|
||||
// d3: in[8]
|
||||
// d4: in[4]
|
||||
// d5: in[12]
|
||||
"vswp d3, d4 \n" |
||||
|
||||
// q8 = {in[4], in[12]} * kC1 * 2 >> 16
|
||||
// q9 = {in[4], in[12]} * kC2 >> 16
|
||||
"vqdmulh.s16 q8, q2, d0[0] \n" |
||||
"vqdmulh.s16 q9, q2, d0[1] \n" |
||||
|
||||
// d22 = a = in[0] + in[8]
|
||||
// d23 = b = in[0] - in[8]
|
||||
"vqadd.s16 d22, d2, d3 \n" |
||||
"vqsub.s16 d23, d2, d3 \n" |
||||
|
||||
// q8 = in[4]/[12] * kC1 >> 16
|
||||
"vshr.s16 q8, q8, #1 \n" |
||||
|
||||
// Add {in[4], in[12]} back after the multiplication.
|
||||
"vqadd.s16 q8, q2, q8 \n" |
||||
|
||||
// d20 = c = in[4]*kC2 - in[12]*kC1
|
||||
// d21 = d = in[4]*kC1 + in[12]*kC2
|
||||
"vqsub.s16 d20, d18, d17 \n" |
||||
"vqadd.s16 d21, d19, d16 \n" |
||||
|
||||
// d2 = tmp[0] = a + d
|
||||
// d3 = tmp[1] = b + c
|
||||
// d4 = tmp[2] = b - c
|
||||
// d5 = tmp[3] = a - d
|
||||
"vqadd.s16 d2, d22, d21 \n" |
||||
"vqadd.s16 d3, d23, d20 \n" |
||||
"vqsub.s16 d4, d23, d20 \n" |
||||
"vqsub.s16 d5, d22, d21 \n" |
||||
|
||||
"vzip.16 q1, q2 \n" |
||||
"vzip.16 q1, q2 \n" |
||||
|
||||
"vswp d3, d4 \n" |
||||
|
||||
// q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
|
||||
// q9 = {tmp[4], tmp[12]} * kC2 >> 16
|
||||
"vqdmulh.s16 q8, q2, d0[0] \n" |
||||
"vqdmulh.s16 q9, q2, d0[1] \n" |
||||
|
||||
// d22 = a = tmp[0] + tmp[8]
|
||||
// d23 = b = tmp[0] - tmp[8]
|
||||
"vqadd.s16 d22, d2, d3 \n" |
||||
"vqsub.s16 d23, d2, d3 \n" |
||||
|
||||
"vshr.s16 q8, q8, #1 \n" |
||||
"vqadd.s16 q8, q2, q8 \n" |
||||
|
||||
// d20 = c = in[4]*kC2 - in[12]*kC1
|
||||
// d21 = d = in[4]*kC1 + in[12]*kC2
|
||||
"vqsub.s16 d20, d18, d17 \n" |
||||
"vqadd.s16 d21, d19, d16 \n" |
||||
|
||||
// d2 = tmp[0] = a + d
|
||||
// d3 = tmp[1] = b + c
|
||||
// d4 = tmp[2] = b - c
|
||||
// d5 = tmp[3] = a - d
|
||||
"vqadd.s16 d2, d22, d21 \n" |
||||
"vqadd.s16 d3, d23, d20 \n" |
||||
"vqsub.s16 d4, d23, d20 \n" |
||||
"vqsub.s16 d5, d22, d21 \n" |
||||
|
||||
"vld1.32 d6[0], [%[ref]], %[kBPS] \n" |
||||
"vld1.32 d6[1], [%[ref]], %[kBPS] \n" |
||||
"vld1.32 d7[0], [%[ref]], %[kBPS] \n" |
||||
"vld1.32 d7[1], [%[ref]], %[kBPS] \n" |
||||
|
||||
"sub %[ref], %[ref], %[kBPS], lsl #2 \n" |
||||
|
||||
// (val) + 4 >> 3
|
||||
"vrshr.s16 d2, d2, #3 \n" |
||||
"vrshr.s16 d3, d3, #3 \n" |
||||
"vrshr.s16 d4, d4, #3 \n" |
||||
"vrshr.s16 d5, d5, #3 \n" |
||||
|
||||
"vzip.16 q1, q2 \n" |
||||
"vzip.16 q1, q2 \n" |
||||
|
||||
// Must accumulate before saturating
|
||||
"vmovl.u8 q8, d6 \n" |
||||
"vmovl.u8 q9, d7 \n" |
||||
|
||||
"vqadd.s16 q1, q1, q8 \n" |
||||
"vqadd.s16 q2, q2, q9 \n" |
||||
|
||||
"vqmovun.s16 d0, q1 \n" |
||||
"vqmovun.s16 d1, q2 \n" |
||||
|
||||
"vst1.32 d0[0], [%[dst]], %[kBPS] \n" |
||||
"vst1.32 d0[1], [%[dst]], %[kBPS] \n" |
||||
"vst1.32 d1[0], [%[dst]], %[kBPS] \n" |
||||
"vst1.32 d1[1], [%[dst]] \n" |
||||
|
||||
: [in] "+r"(in), [dst] "+r"(dst) // modified registers
|
||||
: [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref) // constants
|
||||
: "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" // clobbered
|
||||
); |
||||
} |
||||
|
||||
static void ITransform(const uint8_t* ref, |
||||
const int16_t* in, uint8_t* dst, int do_two) { |
||||
ITransformOne(ref, in, dst); |
||||
if (do_two) { |
||||
ITransformOne(ref + 4, in + 16, dst + 4); |
||||
} |
||||
} |
||||
|
||||
// Same code as dec_neon.c
|
||||
static void ITransformWHT(const int16_t* in, int16_t* out) { |
||||
const int kStep = 32; // The store is only incrementing the pointer as if we
|
||||
// had stored a single byte.
|
||||
__asm__ volatile ( |
||||
// part 1
|
||||
// load data into q0, q1
|
||||
"vld1.16 {q0, q1}, [%[in]] \n" |
||||
|
||||
"vaddl.s16 q2, d0, d3 \n" // a0 = in[0] + in[12]
|
||||
"vaddl.s16 q3, d1, d2 \n" // a1 = in[4] + in[8]
|
||||
"vsubl.s16 q4, d1, d2 \n" // a2 = in[4] - in[8]
|
||||
"vsubl.s16 q5, d0, d3 \n" // a3 = in[0] - in[12]
|
||||
|
||||
"vadd.s32 q0, q2, q3 \n" // tmp[0] = a0 + a1
|
||||
"vsub.s32 q2, q2, q3 \n" // tmp[8] = a0 - a1
|
||||
"vadd.s32 q1, q5, q4 \n" // tmp[4] = a3 + a2
|
||||
"vsub.s32 q3, q5, q4 \n" // tmp[12] = a3 - a2
|
||||
|
||||
// Transpose
|
||||
// q0 = tmp[0, 4, 8, 12], q1 = tmp[2, 6, 10, 14]
|
||||
// q2 = tmp[1, 5, 9, 13], q3 = tmp[3, 7, 11, 15]
|
||||
"vswp d1, d4 \n" // vtrn.64 q0, q2
|
||||
"vswp d3, d6 \n" // vtrn.64 q1, q3
|
||||
"vtrn.32 q0, q1 \n" |
||||
"vtrn.32 q2, q3 \n" |
||||
|
||||
"vmov.s32 q4, #3 \n" // dc = 3
|
||||
"vadd.s32 q0, q0, q4 \n" // dc = tmp[0] + 3
|
||||
"vadd.s32 q6, q0, q3 \n" // a0 = dc + tmp[3]
|
||||
"vadd.s32 q7, q1, q2 \n" // a1 = tmp[1] + tmp[2]
|
||||
"vsub.s32 q8, q1, q2 \n" // a2 = tmp[1] - tmp[2]
|
||||
"vsub.s32 q9, q0, q3 \n" // a3 = dc - tmp[3]
|
||||
|
||||
"vadd.s32 q0, q6, q7 \n" |
||||
"vshrn.s32 d0, q0, #3 \n" // (a0 + a1) >> 3
|
||||
"vadd.s32 q1, q9, q8 \n" |
||||
"vshrn.s32 d1, q1, #3 \n" // (a3 + a2) >> 3
|
||||
"vsub.s32 q2, q6, q7 \n" |
||||
"vshrn.s32 d2, q2, #3 \n" // (a0 - a1) >> 3
|
||||
"vsub.s32 q3, q9, q8 \n" |
||||
"vshrn.s32 d3, q3, #3 \n" // (a3 - a2) >> 3
|
||||
|
||||
// set the results to output
|
||||
"vst1.16 d0[0], [%[out]], %[kStep] \n" |
||||
"vst1.16 d1[0], [%[out]], %[kStep] \n" |
||||
"vst1.16 d2[0], [%[out]], %[kStep] \n" |
||||
"vst1.16 d3[0], [%[out]], %[kStep] \n" |
||||
"vst1.16 d0[1], [%[out]], %[kStep] \n" |
||||
"vst1.16 d1[1], [%[out]], %[kStep] \n" |
||||
"vst1.16 d2[1], [%[out]], %[kStep] \n" |
||||
"vst1.16 d3[1], [%[out]], %[kStep] \n" |
||||
"vst1.16 d0[2], [%[out]], %[kStep] \n" |
||||
"vst1.16 d1[2], [%[out]], %[kStep] \n" |
||||
"vst1.16 d2[2], [%[out]], %[kStep] \n" |
||||
"vst1.16 d3[2], [%[out]], %[kStep] \n" |
||||
"vst1.16 d0[3], [%[out]], %[kStep] \n" |
||||
"vst1.16 d1[3], [%[out]], %[kStep] \n" |
||||
"vst1.16 d2[3], [%[out]], %[kStep] \n" |
||||
"vst1.16 d3[3], [%[out]], %[kStep] \n" |
||||
|
||||
: [out] "+r"(out) // modified registers
|
||||
: [in] "r"(in), [kStep] "r"(kStep) // constants
|
||||
: "memory", "q0", "q1", "q2", "q3", "q4", |
||||
"q5", "q6", "q7", "q8", "q9" // clobbered
|
||||
); |
||||
} |
||||
|
||||
// Forward transform.
|
||||
|
||||
// adapted from vp8/encoder/arm/neon/shortfdct_neon.asm
|
||||
static const int16_t kCoeff16[] = { |
||||
5352, 5352, 5352, 5352, 2217, 2217, 2217, 2217 |
||||
}; |
||||
static const int32_t kCoeff32[] = { |
||||
1812, 1812, 1812, 1812, |
||||
937, 937, 937, 937, |
||||
12000, 12000, 12000, 12000, |
||||
51000, 51000, 51000, 51000 |
||||
}; |
||||
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, |
||||
int16_t* out) { |
||||
const int kBPS = BPS; |
||||
const uint8_t* src_ptr = src; |
||||
const uint8_t* ref_ptr = ref; |
||||
const int16_t* coeff16 = kCoeff16; |
||||
const int32_t* coeff32 = kCoeff32; |
||||
|
||||
__asm__ volatile ( |
||||
// load src into q4, q5 in high half
|
||||
"vld1.8 {d8}, [%[src_ptr]], %[kBPS] \n" |
||||
"vld1.8 {d10}, [%[src_ptr]], %[kBPS] \n" |
||||
"vld1.8 {d9}, [%[src_ptr]], %[kBPS] \n" |
||||
"vld1.8 {d11}, [%[src_ptr]] \n" |
||||
|
||||
// load ref into q6, q7 in high half
|
||||
"vld1.8 {d12}, [%[ref_ptr]], %[kBPS] \n" |
||||
"vld1.8 {d14}, [%[ref_ptr]], %[kBPS] \n" |
||||
"vld1.8 {d13}, [%[ref_ptr]], %[kBPS] \n" |
||||
"vld1.8 {d15}, [%[ref_ptr]] \n" |
||||
|
||||
// Pack the high values in to q4 and q6
|
||||
"vtrn.32 q4, q5 \n" |
||||
"vtrn.32 q6, q7 \n" |
||||
|
||||
// d[0-3] = src - ref
|
||||
"vsubl.u8 q0, d8, d12 \n" |
||||
"vsubl.u8 q1, d9, d13 \n" |
||||
|
||||
// load coeff16 into q8(d16=5352, d17=2217)
|
||||
"vld1.16 {q8}, [%[coeff16]] \n" |
||||
|
||||
// load coeff32 high half into q9 = 1812, q10 = 937
|
||||
"vld1.32 {q9, q10}, [%[coeff32]]! \n" |
||||
|
||||
// load coeff32 low half into q11=12000, q12=51000
|
||||
"vld1.32 {q11,q12}, [%[coeff32]] \n" |
||||
|
||||
// part 1
|
||||
// Transpose. Register dN is the same as dN in C
|
||||
"vtrn.32 d0, d2 \n" |
||||
"vtrn.32 d1, d3 \n" |
||||
"vtrn.16 d0, d1 \n" |
||||
"vtrn.16 d2, d3 \n" |
||||
|
||||
"vadd.s16 d4, d0, d3 \n" // a0 = d0 + d3
|
||||
"vadd.s16 d5, d1, d2 \n" // a1 = d1 + d2
|
||||
"vsub.s16 d6, d1, d2 \n" // a2 = d1 - d2
|
||||
"vsub.s16 d7, d0, d3 \n" // a3 = d0 - d3
|
||||
|
||||
"vadd.s16 d0, d4, d5 \n" // a0 + a1
|
||||
"vshl.s16 d0, d0, #3 \n" // temp[0+i*4] = (a0+a1) << 3
|
||||
"vsub.s16 d2, d4, d5 \n" // a0 - a1
|
||||
"vshl.s16 d2, d2, #3 \n" // (temp[2+i*4] = (a0-a1) << 3
|
||||
|
||||
"vmlal.s16 q9, d7, d16 \n" // a3*5352 + 1812
|
||||
"vmlal.s16 q10, d7, d17 \n" // a3*2217 + 937
|
||||
"vmlal.s16 q9, d6, d17 \n" // a2*2217 + a3*5352 + 1812
|
||||
"vmlsl.s16 q10, d6, d16 \n" // a3*2217 + 937 - a2*5352
|
||||
|
||||
// temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
|
||||
// temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
|
||||
"vshrn.s32 d1, q9, #9 \n" |
||||
"vshrn.s32 d3, q10, #9 \n" |
||||
|
||||
// part 2
|
||||
// transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
|
||||
"vtrn.32 d0, d2 \n" |
||||
"vtrn.32 d1, d3 \n" |
||||
"vtrn.16 d0, d1 \n" |
||||
"vtrn.16 d2, d3 \n" |
||||
|
||||
"vmov.s16 d26, #7 \n" |
||||
|
||||
"vadd.s16 d4, d0, d3 \n" // a1 = ip[0] + ip[12]
|
||||
"vadd.s16 d5, d1, d2 \n" // b1 = ip[4] + ip[8]
|
||||
"vsub.s16 d6, d1, d2 \n" // c1 = ip[4] - ip[8]
|
||||
"vadd.s16 d4, d4, d26 \n" // a1 + 7
|
||||
"vsub.s16 d7, d0, d3 \n" // d1 = ip[0] - ip[12]
|
||||
|
||||
"vadd.s16 d0, d4, d5 \n" // op[0] = a1 + b1 + 7
|
||||
"vsub.s16 d2, d4, d5 \n" // op[8] = a1 - b1 + 7
|
||||
|
||||
"vmlal.s16 q11, d7, d16 \n" // d1*5352 + 12000
|
||||
"vmlal.s16 q12, d7, d17 \n" // d1*2217 + 51000
|
||||
|
||||
"vceq.s16 d4, d7, #0 \n" |
||||
|
||||
"vshr.s16 d0, d0, #4 \n" |
||||
"vshr.s16 d2, d2, #4 \n" |
||||
|
||||
"vmlal.s16 q11, d6, d17 \n" // c1*2217 + d1*5352 + 12000
|
||||
"vmlsl.s16 q12, d6, d16 \n" // d1*2217 - c1*5352 + 51000
|
||||
|
||||
"vmvn.s16 d4, d4 \n" |
||||
// op[4] = (c1*2217 + d1*5352 + 12000)>>16
|
||||
"vshrn.s32 d1, q11, #16 \n" |
||||
// op[4] += (d1!=0)
|
||||
"vsub.s16 d1, d1, d4 \n" |
||||
// op[12]= (d1*2217 - c1*5352 + 51000)>>16
|
||||
"vshrn.s32 d3, q12, #16 \n" |
||||
|
||||
// set result to out array
|
||||
"vst1.16 {q0, q1}, [%[out]] \n" |
||||
: [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr), |
||||
[coeff32] "+r"(coeff32) // modified registers
|
||||
: [kBPS] "r"(kBPS), [coeff16] "r"(coeff16), |
||||
[out] "r"(out) // constants
|
||||
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", |
||||
"q10", "q11", "q12", "q13" // clobbered
|
||||
); |
||||
} |
||||
|
||||
static void FTransformWHT(const int16_t* in, int16_t* out) { |
||||
const int kStep = 32; |
||||
__asm__ volatile ( |
||||
// d0 = in[0 * 16] , d1 = in[1 * 16]
|
||||
// d2 = in[2 * 16] , d3 = in[3 * 16]
|
||||
"vld1.16 d0[0], [%[in]], %[kStep] \n" |
||||
"vld1.16 d1[0], [%[in]], %[kStep] \n" |
||||
"vld1.16 d2[0], [%[in]], %[kStep] \n" |
||||
"vld1.16 d3[0], [%[in]], %[kStep] \n" |
||||
"vld1.16 d0[1], [%[in]], %[kStep] \n" |
||||
"vld1.16 d1[1], [%[in]], %[kStep] \n" |
||||
"vld1.16 d2[1], [%[in]], %[kStep] \n" |
||||
"vld1.16 d3[1], [%[in]], %[kStep] \n" |
||||
"vld1.16 d0[2], [%[in]], %[kStep] \n" |
||||
"vld1.16 d1[2], [%[in]], %[kStep] \n" |
||||
"vld1.16 d2[2], [%[in]], %[kStep] \n" |
||||
"vld1.16 d3[2], [%[in]], %[kStep] \n" |
||||
"vld1.16 d0[3], [%[in]], %[kStep] \n" |
||||
"vld1.16 d1[3], [%[in]], %[kStep] \n" |
||||
"vld1.16 d2[3], [%[in]], %[kStep] \n" |
||||
"vld1.16 d3[3], [%[in]], %[kStep] \n" |
||||
|
||||
"vaddl.s16 q2, d0, d2 \n" |
||||
"vshl.s32 q2, q2, #2 \n" // a0=(in[0*16]+in[2*16])<<2
|
||||
"vaddl.s16 q3, d1, d3 \n" |
||||
"vshl.s32 q3, q3, #2 \n" // a1=(in[1*16]+in[3*16])<<2
|
||||
"vsubl.s16 q4, d1, d3 \n" |
||||
"vshl.s32 q4, q4, #2 \n" // a2=(in[1*16]-in[3*16])<<2
|
||||
"vsubl.s16 q5, d0, d2 \n" |
||||
"vshl.s32 q5, q5, #2 \n" // a3=(in[0*16]-in[2*16])<<2
|
||||
|
||||
"vceq.s32 q10, q2, #0 \n" |
||||
"vmvn.s32 q10, q10 \n" // (a0 != 0)
|
||||
"vqadd.s32 q6, q2, q3 \n" // (a0 + a1)
|
||||
"vqsub.s32 q6, q6, q10 \n" // (a0 + a1) + (a0 != 0)
|
||||
"vqadd.s32 q7, q5, q4 \n" // a3 + a2
|
||||
"vqsub.s32 q8, q5, q4 \n" // a3 - a2
|
||||
"vqsub.s32 q9, q2, q3 \n" // a0 - a1
|
||||
|
||||
// Transpose
|
||||
// q6 = tmp[0, 1, 2, 3] ; q7 = tmp[ 4, 5, 6, 7]
|
||||
// q8 = tmp[8, 9, 10, 11] ; q9 = tmp[12, 13, 14, 15]
|
||||
"vswp d13, d16 \n" // vtrn.64 q0, q2
|
||||
"vswp d15, d18 \n" // vtrn.64 q1, q3
|
||||
"vtrn.32 q6, q7 \n" |
||||
"vtrn.32 q8, q9 \n" |
||||
|
||||
"vqadd.s32 q0, q6, q8 \n" // a0 = tmp[0] + tmp[8]
|
||||
"vqadd.s32 q1, q7, q9 \n" // a1 = tmp[4] + tmp[12]
|
||||
"vqsub.s32 q2, q7, q9 \n" // a2 = tmp[4] - tmp[12]
|
||||
"vqsub.s32 q3, q6, q8 \n" // a3 = tmp[0] - tmp[8]
|
||||
|
||||
"vqadd.s32 q4, q0, q1 \n" // b0 = a0 + a1
|
||||
"vqadd.s32 q5, q3, q2 \n" // b1 = a3 + a2
|
||||
"vqsub.s32 q6, q3, q2 \n" // b2 = a3 - a2
|
||||
"vqsub.s32 q7, q0, q1 \n" // b3 = a0 - a1
|
||||
|
||||
"vmov.s32 q0, #3 \n" // q0 = 3
|
||||
|
||||
"vcgt.s32 q1, q4, #0 \n" // (b0>0)
|
||||
"vqsub.s32 q2, q4, q1 \n" // (b0+(b0>0))
|
||||
"vqadd.s32 q3, q2, q0 \n" // (b0+(b0>0)+3)
|
||||
"vshrn.s32 d18, q3, #3 \n" // (b0+(b0>0)+3) >> 3
|
||||
|
||||
"vcgt.s32 q1, q5, #0 \n" // (b1>0)
|
||||
"vqsub.s32 q2, q5, q1 \n" // (b1+(b1>0))
|
||||
"vqadd.s32 q3, q2, q0 \n" // (b1+(b1>0)+3)
|
||||
"vshrn.s32 d19, q3, #3 \n" // (b1+(b1>0)+3) >> 3
|
||||
|
||||
"vcgt.s32 q1, q6, #0 \n" // (b2>0)
|
||||
"vqsub.s32 q2, q6, q1 \n" // (b2+(b2>0))
|
||||
"vqadd.s32 q3, q2, q0 \n" // (b2+(b2>0)+3)
|
||||
"vshrn.s32 d20, q3, #3 \n" // (b2+(b2>0)+3) >> 3
|
||||
|
||||
"vcgt.s32 q1, q7, #0 \n" // (b3>0)
|
||||
"vqsub.s32 q2, q7, q1 \n" // (b3+(b3>0))
|
||||
"vqadd.s32 q3, q2, q0 \n" // (b3+(b3>0)+3)
|
||||
"vshrn.s32 d21, q3, #3 \n" // (b3+(b3>0)+3) >> 3
|
||||
|
||||
"vst1.16 {q9, q10}, [%[out]] \n" |
||||
|
||||
: [in] "+r"(in) |
||||
: [kStep] "r"(kStep), [out] "r"(out) |
||||
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", |
||||
"q6", "q7", "q8", "q9", "q10" // clobbered
|
||||
) ; |
||||
} |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Texture distortion
|
||||
//
|
||||
// We try to match the spectral content (weighted) between source and
|
||||
// reconstructed samples.
|
||||
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// This uses a TTransform helper function in C
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b, |
||||
const uint16_t* const w) { |
||||
const int kBPS = BPS; |
||||
const uint8_t* A = a; |
||||
const uint8_t* B = b; |
||||
const uint16_t* W = w; |
||||
int sum; |
||||
__asm__ volatile ( |
||||
"vld1.32 d0[0], [%[a]], %[kBPS] \n" |
||||
"vld1.32 d0[1], [%[a]], %[kBPS] \n" |
||||
"vld1.32 d2[0], [%[a]], %[kBPS] \n" |
||||
"vld1.32 d2[1], [%[a]] \n" |
||||
|
||||
"vld1.32 d1[0], [%[b]], %[kBPS] \n" |
||||
"vld1.32 d1[1], [%[b]], %[kBPS] \n" |
||||
"vld1.32 d3[0], [%[b]], %[kBPS] \n" |
||||
"vld1.32 d3[1], [%[b]] \n" |
||||
|
||||
// a d0/d2, b d1/d3
|
||||
// d0/d1: 01 01 01 01
|
||||
// d2/d3: 23 23 23 23
|
||||
// But: it goes 01 45 23 67
|
||||
// Notice the middle values are transposed
|
||||
"vtrn.16 q0, q1 \n" |
||||
|
||||
// {a0, a1} = {in[0] + in[2], in[1] + in[3]}
|
||||
"vaddl.u8 q2, d0, d2 \n" |
||||
"vaddl.u8 q10, d1, d3 \n" |
||||
// {a3, a2} = {in[0] - in[2], in[1] - in[3]}
|
||||
"vsubl.u8 q3, d0, d2 \n" |
||||
"vsubl.u8 q11, d1, d3 \n" |
||||
|
||||
// tmp[0] = a0 + a1
|
||||
"vpaddl.s16 q0, q2 \n" |
||||
"vpaddl.s16 q8, q10 \n" |
||||
|
||||
// tmp[1] = a3 + a2
|
||||
"vpaddl.s16 q1, q3 \n" |
||||
"vpaddl.s16 q9, q11 \n" |
||||
|
||||
// No pair subtract
|
||||
// q2 = {a0, a3}
|
||||
// q3 = {a1, a2}
|
||||
"vtrn.16 q2, q3 \n" |
||||
"vtrn.16 q10, q11 \n" |
||||
|
||||
// {tmp[3], tmp[2]} = {a0 - a1, a3 - a2}
|
||||
"vsubl.s16 q12, d4, d6 \n" |
||||
"vsubl.s16 q13, d5, d7 \n" |
||||
"vsubl.s16 q14, d20, d22 \n" |
||||
"vsubl.s16 q15, d21, d23 \n" |
||||
|
||||
// separate tmp[3] and tmp[2]
|
||||
// q12 = tmp[3]
|
||||
// q13 = tmp[2]
|
||||
"vtrn.32 q12, q13 \n" |
||||
"vtrn.32 q14, q15 \n" |
||||
|
||||
// Transpose tmp for a
|
||||
"vswp d1, d26 \n" // vtrn.64
|
||||
"vswp d3, d24 \n" // vtrn.64
|
||||
"vtrn.32 q0, q1 \n" |
||||
"vtrn.32 q13, q12 \n" |
||||
|
||||
// Transpose tmp for b
|
||||
"vswp d17, d30 \n" // vtrn.64
|
||||
"vswp d19, d28 \n" // vtrn.64
|
||||
"vtrn.32 q8, q9 \n" |
||||
"vtrn.32 q15, q14 \n" |
||||
|
||||
// The first Q register is a, the second b.
|
||||
// q0/8 tmp[0-3]
|
||||
// q13/15 tmp[4-7]
|
||||
// q1/9 tmp[8-11]
|
||||
// q12/14 tmp[12-15]
|
||||
|
||||
// These are still in 01 45 23 67 order. We fix it easily in the addition
|
||||
// case but the subtraction propegates them.
|
||||
"vswp d3, d27 \n" |
||||
"vswp d19, d31 \n" |
||||
|
||||
// a0 = tmp[0] + tmp[8]
|
||||
"vadd.s32 q2, q0, q1 \n" |
||||
"vadd.s32 q3, q8, q9 \n" |
||||
|
||||
// a1 = tmp[4] + tmp[12]
|
||||
"vadd.s32 q10, q13, q12 \n" |
||||
"vadd.s32 q11, q15, q14 \n" |
||||
|
||||
// a2 = tmp[4] - tmp[12]
|
||||
"vsub.s32 q13, q13, q12 \n" |
||||
"vsub.s32 q15, q15, q14 \n" |
||||
|
||||
// a3 = tmp[0] - tmp[8]
|
||||
"vsub.s32 q0, q0, q1 \n" |
||||
"vsub.s32 q8, q8, q9 \n" |
||||
|
||||
// b0 = a0 + a1
|
||||
"vadd.s32 q1, q2, q10 \n" |
||||
"vadd.s32 q9, q3, q11 \n" |
||||
|
||||
// b1 = a3 + a2
|
||||
"vadd.s32 q12, q0, q13 \n" |
||||
"vadd.s32 q14, q8, q15 \n" |
||||
|
||||
// b2 = a3 - a2
|
||||
"vsub.s32 q0, q0, q13 \n" |
||||
"vsub.s32 q8, q8, q15 \n" |
||||
|
||||
// b3 = a0 - a1
|
||||
"vsub.s32 q2, q2, q10 \n" |
||||
"vsub.s32 q3, q3, q11 \n" |
||||
|
||||
"vld1.64 {q10, q11}, [%[w]] \n" |
||||
|
||||
// abs(b0)
|
||||
"vabs.s32 q1, q1 \n" |
||||
"vabs.s32 q9, q9 \n" |
||||
// abs(b1)
|
||||
"vabs.s32 q12, q12 \n" |
||||
"vabs.s32 q14, q14 \n" |
||||
// abs(b2)
|
||||
"vabs.s32 q0, q0 \n" |
||||
"vabs.s32 q8, q8 \n" |
||||
// abs(b3)
|
||||
"vabs.s32 q2, q2 \n" |
||||
"vabs.s32 q3, q3 \n" |
||||
|
||||
// expand w before using.
|
||||
"vmovl.u16 q13, d20 \n" |
||||
"vmovl.u16 q15, d21 \n" |
||||
|
||||
// w[0] * abs(b0)
|
||||
"vmul.u32 q1, q1, q13 \n" |
||||
"vmul.u32 q9, q9, q13 \n" |
||||
|
||||
// w[4] * abs(b1)
|
||||
"vmla.u32 q1, q12, q15 \n" |
||||
"vmla.u32 q9, q14, q15 \n" |
||||
|
||||
// expand w before using.
|
||||
"vmovl.u16 q13, d22 \n" |
||||
"vmovl.u16 q15, d23 \n" |
||||
|
||||
// w[8] * abs(b1)
|
||||
"vmla.u32 q1, q0, q13 \n" |
||||
"vmla.u32 q9, q8, q13 \n" |
||||
|
||||
// w[12] * abs(b1)
|
||||
"vmla.u32 q1, q2, q15 \n" |
||||
"vmla.u32 q9, q3, q15 \n" |
||||
|
||||
// Sum the arrays
|
||||
"vpaddl.u32 q1, q1 \n" |
||||
"vpaddl.u32 q9, q9 \n" |
||||
"vadd.u64 d2, d3 \n" |
||||
"vadd.u64 d18, d19 \n" |
||||
|
||||
// Hadamard transform needs 4 bits of extra precision (2 bits in each
|
||||
// direction) for dynamic raw. Weights w[] are 16bits at max, so the maximum
|
||||
// precision for coeff is 8bit of input + 4bits of Hadamard transform +
|
||||
// 16bits for w[] + 2 bits of abs() summation.
|
||||
//
|
||||
// This uses a maximum of 31 bits (signed). Discarding the top 32 bits is
|
||||
// A-OK.
|
||||
|
||||
// sum2 - sum1
|
||||
"vsub.u32 d0, d2, d18 \n" |
||||
// abs(sum2 - sum1)
|
||||
"vabs.s32 d0, d0 \n" |
||||
// abs(sum2 - sum1) >> 5
|
||||
"vshr.u32 d0, #5 \n" |
||||
|
||||
// It would be better to move the value straight into r0 but I'm not
|
||||
// entirely sure how this works with inline assembly.
|
||||
"vmov.32 %[sum], d0[0] \n" |
||||
|
||||
: [sum] "=r"(sum), [a] "+r"(A), [b] "+r"(B), [w] "+r"(W) |
||||
: [kBPS] "r"(kBPS) |
||||
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", |
||||
"q10", "q11", "q12", "q13", "q14", "q15" // clobbered
|
||||
) ; |
||||
|
||||
return sum; |
||||
} |
||||
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b, |
||||
const uint16_t* const w) { |
||||
int D = 0; |
||||
int x, y; |
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) { |
||||
for (x = 0; x < 16; x += 4) { |
||||
D += Disto4x4(a + x + y, b + x + y, w); |
||||
} |
||||
} |
||||
return D; |
||||
} |
||||
|
||||
#endif // WEBP_USE_NEON
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void VP8EncDspInitNEON(void); |
||||
|
||||
void VP8EncDspInitNEON(void) { |
||||
#if defined(WEBP_USE_NEON) |
||||
VP8ITransform = ITransform; |
||||
VP8FTransform = FTransform; |
||||
|
||||
VP8ITransformWHT = ITransformWHT; |
||||
VP8FTransformWHT = FTransformWHT; |
||||
|
||||
VP8TDisto4x4 = Disto4x4; |
||||
VP8TDisto16x16 = Disto16x16; |
||||
#endif // WEBP_USE_NEON
|
||||
} |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
} // extern "C"
|
||||
#endif |
@ -0,0 +1,292 @@ |
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// This code is licensed under the same terms as WebM:
|
||||
// Software License Agreement: http://www.webmproject.org/license/software/
|
||||
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// NEON version of YUV to RGB upsampling functions.
|
||||
//
|
||||
// Author: mans@mansr.com (Mans Rullgard)
|
||||
// Based on SSE code by: somnath@google.com (Somnath Banerjee)
|
||||
|
||||
#include "./dsp.h" |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
extern "C" { |
||||
#endif |
||||
|
||||
#if defined(WEBP_USE_NEON) |
||||
|
||||
#include <assert.h> |
||||
#include <arm_neon.h> |
||||
#include <string.h> |
||||
#include "./yuv.h" |
||||
|
||||
#ifdef FANCY_UPSAMPLING |
||||
|
||||
// Loads 9 pixels each from rows r1 and r2 and generates 16 pixels.
|
||||
#define UPSAMPLE_16PIXELS(r1, r2, out) { \ |
||||
uint8x8_t a = vld1_u8(r1); \
|
||||
uint8x8_t b = vld1_u8(r1 + 1); \
|
||||
uint8x8_t c = vld1_u8(r2); \
|
||||
uint8x8_t d = vld1_u8(r2 + 1); \
|
||||
\
|
||||
uint16x8_t al = vshll_n_u8(a, 1); \
|
||||
uint16x8_t bl = vshll_n_u8(b, 1); \
|
||||
uint16x8_t cl = vshll_n_u8(c, 1); \
|
||||
uint16x8_t dl = vshll_n_u8(d, 1); \
|
||||
\
|
||||
uint8x8_t diag1, diag2; \
|
||||
uint16x8_t sl; \
|
||||
\
|
||||
/* a + b + c + d */ \
|
||||
sl = vaddl_u8(a, b); \
|
||||
sl = vaddw_u8(sl, c); \
|
||||
sl = vaddw_u8(sl, d); \
|
||||
\
|
||||
al = vaddq_u16(sl, al); /* 3a + b + c + d */ \
|
||||
bl = vaddq_u16(sl, bl); /* a + 3b + c + d */ \
|
||||
\
|
||||
al = vaddq_u16(al, dl); /* 3a + b + c + 3d */ \
|
||||
bl = vaddq_u16(bl, cl); /* a + 3b + 3c + d */ \
|
||||
\
|
||||
diag2 = vshrn_n_u16(al, 3); \
|
||||
diag1 = vshrn_n_u16(bl, 3); \
|
||||
\
|
||||
a = vrhadd_u8(a, diag1); \
|
||||
b = vrhadd_u8(b, diag2); \
|
||||
c = vrhadd_u8(c, diag2); \
|
||||
d = vrhadd_u8(d, diag1); \
|
||||
\
|
||||
{ \
|
||||
const uint8x8x2_t a_b = {{ a, b }}; \
|
||||
const uint8x8x2_t c_d = {{ c, d }}; \
|
||||
vst2_u8(out, a_b); \
|
||||
vst2_u8(out + 32, c_d); \
|
||||
} \
|
||||
} |
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2, |
||||
uint8_t *out) { |
||||
UPSAMPLE_16PIXELS(r1, r2, out); |
||||
} |
||||
|
||||
#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) { \ |
||||
uint8_t r1[9], r2[9]; \
|
||||
memcpy(r1, (tb), (num_pixels)); \
|
||||
memcpy(r2, (bb), (num_pixels)); \
|
||||
/* replicate last byte */ \
|
||||
memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels)); \
|
||||
memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels)); \
|
||||
Upsample16Pixels(r1, r2, out); \
|
||||
} |
||||
|
||||
#define CY 76283 |
||||
#define CVR 89858 |
||||
#define CUG 22014 |
||||
#define CVG 45773 |
||||
#define CUB 113618 |
||||
|
||||
static const int16_t coef[4] = { CVR / 4, CUG, CVG / 2, CUB / 4 }; |
||||
|
||||
#define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x) { \ |
||||
int i; \
|
||||
for (i = 0; i < N; i += 8) { \
|
||||
int off = ((cur_x) + i) * XSTEP; \
|
||||
uint8x8_t y = vld1_u8(src_y + (cur_x) + i); \
|
||||
uint8x8_t u = vld1_u8((src_uv) + i); \
|
||||
uint8x8_t v = vld1_u8((src_uv) + i + 16); \
|
||||
int16x8_t yy = vreinterpretq_s16_u16(vsubl_u8(y, u16)); \
|
||||
int16x8_t uu = vreinterpretq_s16_u16(vsubl_u8(u, u128)); \
|
||||
int16x8_t vv = vreinterpretq_s16_u16(vsubl_u8(v, u128)); \
|
||||
\
|
||||
int16x8_t ud = vshlq_n_s16(uu, 1); \
|
||||
int16x8_t vd = vshlq_n_s16(vv, 1); \
|
||||
\
|
||||
int32x4_t vrl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(vv), 1), \
|
||||
vget_low_s16(vd), cf16, 0); \
|
||||
int32x4_t vrh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(vv), 1), \
|
||||
vget_high_s16(vd), cf16, 0); \
|
||||
int16x8_t vr = vcombine_s16(vrshrn_n_s32(vrl, 16), \
|
||||
vrshrn_n_s32(vrh, 16)); \
|
||||
\
|
||||
int32x4_t vl = vmovl_s16(vget_low_s16(vv)); \
|
||||
int32x4_t vh = vmovl_s16(vget_high_s16(vv)); \
|
||||
int32x4_t ugl = vmlal_lane_s16(vl, vget_low_s16(uu), cf16, 1); \
|
||||
int32x4_t ugh = vmlal_lane_s16(vh, vget_high_s16(uu), cf16, 1); \
|
||||
int32x4_t gcl = vqdmlal_lane_s16(ugl, vget_low_s16(vv), cf16, 2); \
|
||||
int32x4_t gch = vqdmlal_lane_s16(ugh, vget_high_s16(vv), cf16, 2); \
|
||||
int16x8_t gc = vcombine_s16(vrshrn_n_s32(gcl, 16), \
|
||||
vrshrn_n_s32(gch, 16)); \
|
||||
\
|
||||
int32x4_t ubl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(uu), 1), \
|
||||
vget_low_s16(ud), cf16, 3); \
|
||||
int32x4_t ubh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(uu), 1), \
|
||||
vget_high_s16(ud), cf16, 3); \
|
||||
int16x8_t ub = vcombine_s16(vrshrn_n_s32(ubl, 16), \
|
||||
vrshrn_n_s32(ubh, 16)); \
|
||||
\
|
||||
int32x4_t rl = vaddl_s16(vget_low_s16(yy), vget_low_s16(vr)); \
|
||||
int32x4_t rh = vaddl_s16(vget_high_s16(yy), vget_high_s16(vr)); \
|
||||
int32x4_t gl = vsubl_s16(vget_low_s16(yy), vget_low_s16(gc)); \
|
||||
int32x4_t gh = vsubl_s16(vget_high_s16(yy), vget_high_s16(gc)); \
|
||||
int32x4_t bl = vaddl_s16(vget_low_s16(yy), vget_low_s16(ub)); \
|
||||
int32x4_t bh = vaddl_s16(vget_high_s16(yy), vget_high_s16(ub)); \
|
||||
\
|
||||
rl = vmulq_lane_s32(rl, cf32, 0); \
|
||||
rh = vmulq_lane_s32(rh, cf32, 0); \
|
||||
gl = vmulq_lane_s32(gl, cf32, 0); \
|
||||
gh = vmulq_lane_s32(gh, cf32, 0); \
|
||||
bl = vmulq_lane_s32(bl, cf32, 0); \
|
||||
bh = vmulq_lane_s32(bh, cf32, 0); \
|
||||
\
|
||||
y = vqmovun_s16(vcombine_s16(vrshrn_n_s32(rl, 16), \
|
||||
vrshrn_n_s32(rh, 16))); \
|
||||
u = vqmovun_s16(vcombine_s16(vrshrn_n_s32(gl, 16), \
|
||||
vrshrn_n_s32(gh, 16))); \
|
||||
v = vqmovun_s16(vcombine_s16(vrshrn_n_s32(bl, 16), \
|
||||
vrshrn_n_s32(bh, 16))); \
|
||||
STR_ ## FMT(out + off, y, u, v); \
|
||||
} \
|
||||
} |
||||
|
||||
#define v255 vmov_n_u8(255) |
||||
|
||||
#define STR_Rgb(out, r, g, b) do { \ |
||||
const uint8x8x3_t r_g_b = {{ r, g, b }}; \
|
||||
vst3_u8(out, r_g_b); \
|
||||
} while (0) |
||||
|
||||
#define STR_Bgr(out, r, g, b) do { \ |
||||
const uint8x8x3_t b_g_r = {{ b, g, r }}; \
|
||||
vst3_u8(out, b_g_r); \
|
||||
} while (0) |
||||
|
||||
#define STR_Rgba(out, r, g, b) do { \ |
||||
const uint8x8x4_t r_g_b_v255 = {{ r, g, b, v255 }}; \
|
||||
vst4_u8(out, r_g_b_v255); \
|
||||
} while (0) |
||||
|
||||
#define STR_Bgra(out, r, g, b) do { \ |
||||
const uint8x8x4_t b_g_r_v255 = {{ b, g, r, v255 }}; \
|
||||
vst4_u8(out, b_g_r_v255); \
|
||||
} while (0) |
||||
|
||||
#define CONVERT1(FMT, XSTEP, N, src_y, src_uv, rgb, cur_x) { \ |
||||
int i; \
|
||||
for (i = 0; i < N; i++) { \
|
||||
int off = ((cur_x) + i) * XSTEP; \
|
||||
int y = src_y[(cur_x) + i]; \
|
||||
int u = (src_uv)[i]; \
|
||||
int v = (src_uv)[i + 16]; \
|
||||
VP8YuvTo ## FMT(y, u, v, rgb + off); \
|
||||
} \
|
||||
} |
||||
|
||||
#define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv, \ |
||||
top_dst, bottom_dst, cur_x, len) { \
|
||||
if (top_y) { \
|
||||
CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x) \
|
||||
} \
|
||||
if (bottom_y) { \
|
||||
CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x) \
|
||||
} \
|
||||
} |
||||
|
||||
#define CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, uv, \ |
||||
top_dst, bottom_dst, cur_x, len) { \
|
||||
if (top_y) { \
|
||||
CONVERT1(FMT, XSTEP, len, top_y, uv, top_dst, cur_x); \
|
||||
} \
|
||||
if (bottom_y) { \
|
||||
CONVERT1(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x); \
|
||||
} \
|
||||
} |
||||
|
||||
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \ |
||||
static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \
|
||||
const uint8_t *top_u, const uint8_t *top_v, \
|
||||
const uint8_t *cur_u, const uint8_t *cur_v, \
|
||||
uint8_t *top_dst, uint8_t *bottom_dst, int len) { \
|
||||
int block; \
|
||||
/* 16 byte aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[2 * 32 + 15]; \
|
||||
uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
|
||||
const int uv_len = (len + 1) >> 1; \
|
||||
/* 9 pixels must be read-able for each block */ \
|
||||
const int num_blocks = (uv_len - 1) >> 3; \
|
||||
const int leftover = uv_len - num_blocks * 8; \
|
||||
const int last_pos = 1 + 16 * num_blocks; \
|
||||
\
|
||||
const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \
|
||||
const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \
|
||||
\
|
||||
const int16x4_t cf16 = vld1_s16(coef); \
|
||||
const int32x2_t cf32 = vmov_n_s32(CY); \
|
||||
const uint8x8_t u16 = vmov_n_u8(16); \
|
||||
const uint8x8_t u128 = vmov_n_u8(128); \
|
||||
\
|
||||
/* Treat the first pixel in regular way */ \
|
||||
if (top_y) { \
|
||||
const int u0 = (top_u[0] + u_diag) >> 1; \
|
||||
const int v0 = (top_v[0] + v_diag) >> 1; \
|
||||
VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst); \
|
||||
} \
|
||||
if (bottom_y) { \
|
||||
const int u0 = (cur_u[0] + u_diag) >> 1; \
|
||||
const int v0 = (cur_v[0] + v_diag) >> 1; \
|
||||
VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst); \
|
||||
} \
|
||||
\
|
||||
for (block = 0; block < num_blocks; ++block) { \
|
||||
UPSAMPLE_16PIXELS(top_u, cur_u, r_uv); \
|
||||
UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16); \
|
||||
CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, \
|
||||
top_dst, bottom_dst, 16 * block + 1, 16); \
|
||||
top_u += 8; \
|
||||
cur_u += 8; \
|
||||
top_v += 8; \
|
||||
cur_v += 8; \
|
||||
} \
|
||||
\
|
||||
UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv); \
|
||||
UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16); \
|
||||
CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, r_uv, \
|
||||
top_dst, bottom_dst, last_pos, len - last_pos); \
|
||||
} |
||||
|
||||
// NEON variants of the fancy upsampler.
|
||||
NEON_UPSAMPLE_FUNC(UpsampleRgbLinePairNEON, Rgb, 3) |
||||
NEON_UPSAMPLE_FUNC(UpsampleBgrLinePairNEON, Bgr, 3) |
||||
NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePairNEON, Rgba, 4) |
||||
NEON_UPSAMPLE_FUNC(UpsampleBgraLinePairNEON, Bgra, 4) |
||||
|
||||
#endif // FANCY_UPSAMPLING
|
||||
|
||||
#endif // WEBP_USE_NEON
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; |
||||
|
||||
void WebPInitUpsamplersNEON(void) { |
||||
#if defined(WEBP_USE_NEON) |
||||
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairNEON; |
||||
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairNEON; |
||||
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairNEON; |
||||
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairNEON; |
||||
#endif // WEBP_USE_NEON
|
||||
} |
||||
|
||||
void WebPInitPremultiplyNEON(void) { |
||||
#if defined(WEBP_USE_NEON) |
||||
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairNEON; |
||||
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairNEON; |
||||
#endif // WEBP_USE_NEON
|
||||
} |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
} // extern "C"
|
||||
#endif |
@ -0,0 +1,254 @@ |
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// This code is licensed under the same terms as WebM:
|
||||
// Software License Agreement: http://www.webmproject.org/license/software/
|
||||
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Paginated token buffer
|
||||
//
|
||||
// A 'token' is a bit value associated with a probability, either fixed
|
||||
// or a later-to-be-determined after statistics have been collected.
|
||||
// For dynamic probability, we just record the slot id (idx) for the probability
|
||||
// value in the final probability array (uint8_t* probas in VP8EmitTokens).
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <assert.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
|
||||
#include "./vp8enci.h" |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
extern "C" { |
||||
#endif |
||||
|
||||
#if !defined(DISABLE_TOKEN_BUFFER) |
||||
|
||||
// we use pages to reduce the number of memcpy()
|
||||
#define MAX_NUM_TOKEN 8192 // max number of token per page
|
||||
#define FIXED_PROBA_BIT (1u << 14) |
||||
|
||||
struct VP8Tokens { |
||||
uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit
|
||||
// bit #14: constant proba or idx
|
||||
// bits 0..13: slot or constant proba
|
||||
VP8Tokens* next_; |
||||
}; |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
void VP8TBufferInit(VP8TBuffer* const b) { |
||||
b->tokens_ = NULL; |
||||
b->pages_ = NULL; |
||||
b->last_page_ = &b->pages_; |
||||
b->left_ = 0; |
||||
b->error_ = 0; |
||||
} |
||||
|
||||
void VP8TBufferClear(VP8TBuffer* const b) { |
||||
if (b != NULL) { |
||||
const VP8Tokens* p = b->pages_; |
||||
while (p != NULL) { |
||||
const VP8Tokens* const next = p->next_; |
||||
free((void*)p); |
||||
p = next; |
||||
} |
||||
VP8TBufferInit(b); |
||||
} |
||||
} |
||||
|
||||
static int TBufferNewPage(VP8TBuffer* const b) { |
||||
VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page)); |
||||
if (page == NULL) { |
||||
b->error_ = 1; |
||||
return 0; |
||||
} |
||||
*b->last_page_ = page; |
||||
b->last_page_ = &page->next_; |
||||
b->left_ = MAX_NUM_TOKEN; |
||||
b->tokens_ = page->tokens_; |
||||
page->next_ = NULL; |
||||
return 1; |
||||
} |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define TOKEN_ID(t, b, ctx, p) \ |
||||
((p) + NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t)))) |
||||
|
||||
static WEBP_INLINE int AddToken(VP8TBuffer* const b, |
||||
int bit, uint32_t proba_idx) { |
||||
assert(proba_idx < FIXED_PROBA_BIT); |
||||
assert(bit == 0 || bit == 1); |
||||
if (b->left_ > 0 || TBufferNewPage(b)) { |
||||
const int slot = --b->left_; |
||||
b->tokens_[slot] = (bit << 15) | proba_idx; |
||||
} |
||||
return bit; |
||||
} |
||||
|
||||
static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b, |
||||
int bit, int proba) { |
||||
assert(proba < 256); |
||||
assert(bit == 0 || bit == 1); |
||||
if (b->left_ > 0 || TBufferNewPage(b)) { |
||||
const int slot = --b->left_; |
||||
b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba; |
||||
} |
||||
} |
||||
|
||||
int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last, |
||||
const int16_t* const coeffs, |
||||
VP8TBuffer* const tokens) { |
||||
int n = first; |
||||
uint32_t base_id = TOKEN_ID(coeff_type, n, ctx, 0); |
||||
if (!AddToken(tokens, last >= 0, base_id + 0)) { |
||||
return 0; |
||||
} |
||||
|
||||
while (n < 16) { |
||||
const int c = coeffs[n++]; |
||||
const int sign = c < 0; |
||||
int v = sign ? -c : c; |
||||
if (!AddToken(tokens, v != 0, base_id + 1)) { |
||||
ctx = 0; |
||||
base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0); |
||||
continue; |
||||
} |
||||
if (!AddToken(tokens, v > 1, base_id + 2)) { |
||||
ctx = 1; |
||||
} else { |
||||
if (!AddToken(tokens, v > 4, base_id + 3)) { |
||||
if (AddToken(tokens, v != 2, base_id + 4)) |
||||
AddToken(tokens, v == 4, base_id + 5); |
||||
} else if (!AddToken(tokens, v > 10, base_id + 6)) { |
||||
if (!AddToken(tokens, v > 6, base_id + 7)) { |
||||
AddConstantToken(tokens, v == 6, 159); |
||||
} else { |
||||
AddConstantToken(tokens, v >= 9, 165); |
||||
AddConstantToken(tokens, !(v & 1), 145); |
||||
} |
||||
} else { |
||||
int mask; |
||||
const uint8_t* tab; |
||||
if (v < 3 + (8 << 1)) { // VP8Cat3 (3b)
|
||||
AddToken(tokens, 0, base_id + 8); |
||||
AddToken(tokens, 0, base_id + 9); |
||||
v -= 3 + (8 << 0); |
||||
mask = 1 << 2; |
||||
tab = VP8Cat3; |
||||
} else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b)
|
||||
AddToken(tokens, 0, base_id + 8); |
||||
AddToken(tokens, 1, base_id + 9); |
||||
v -= 3 + (8 << 1); |
||||
mask = 1 << 3; |
||||
tab = VP8Cat4; |
||||
} else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b)
|
||||
AddToken(tokens, 1, base_id + 8); |
||||
AddToken(tokens, 0, base_id + 10); |
||||
v -= 3 + (8 << 2); |
||||
mask = 1 << 4; |
||||
tab = VP8Cat5; |
||||
} else { // VP8Cat6 (11b)
|
||||
AddToken(tokens, 1, base_id + 8); |
||||
AddToken(tokens, 1, base_id + 10); |
||||
v -= 3 + (8 << 3); |
||||
mask = 1 << 10; |
||||
tab = VP8Cat6; |
||||
} |
||||
while (mask) { |
||||
AddConstantToken(tokens, !!(v & mask), *tab++); |
||||
mask >>= 1; |
||||
} |
||||
} |
||||
ctx = 2; |
||||
} |
||||
AddConstantToken(tokens, sign, 128); |
||||
base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0); |
||||
if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) { |
||||
return 1; // EOB
|
||||
} |
||||
} |
||||
return 1; |
||||
} |
||||
|
||||
#undef TOKEN_ID |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// This function works, but isn't currently used. Saved for later.
|
||||
|
||||
#if 0 |
||||
|
||||
static void Record(int bit, proba_t* const stats) { |
||||
proba_t p = *stats; |
||||
if (p >= 0xffff0000u) { // an overflow is inbound.
|
||||
p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2.
|
||||
} |
||||
// record bit count (lower 16 bits) and increment total count (upper 16 bits).
|
||||
p += 0x00010000u + bit; |
||||
*stats = p; |
||||
} |
||||
|
||||
void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) { |
||||
const VP8Tokens* p = b->pages_; |
||||
while (p != NULL) { |
||||
const int N = (p->next_ == NULL) ? b->left_ : 0; |
||||
int n = MAX_NUM_TOKEN; |
||||
while (n-- > N) { |
||||
const uint16_t token = p->tokens_[n]; |
||||
if (!(token & FIXED_PROBA_BIT)) { |
||||
Record((token >> 15) & 1, stats + (token & 0x3fffu)); |
||||
} |
||||
} |
||||
p = p->next_; |
||||
} |
||||
} |
||||
|
||||
#endif // 0
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Final coding pass, with known probabilities
|
||||
|
||||
int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, |
||||
const uint8_t* const probas, int final_pass) { |
||||
const VP8Tokens* p = b->pages_; |
||||
(void)final_pass; |
||||
if (b->error_) return 0; |
||||
while (p != NULL) { |
||||
const VP8Tokens* const next = p->next_; |
||||
const int N = (next == NULL) ? b->left_ : 0; |
||||
int n = MAX_NUM_TOKEN; |
||||
while (n-- > N) { |
||||
const uint16_t token = p->tokens_[n]; |
||||
const int bit = (token >> 15) & 1; |
||||
if (token & FIXED_PROBA_BIT) { |
||||
VP8PutBit(bw, bit, token & 0xffu); // constant proba
|
||||
} else { |
||||
VP8PutBit(bw, bit, probas[token & 0x3fffu]); |
||||
} |
||||
} |
||||
if (final_pass) free((void*)p); |
||||
p = next; |
||||
} |
||||
if (final_pass) b->pages_ = NULL; |
||||
return 1; |
||||
} |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#else // DISABLE_TOKEN_BUFFER
|
||||
|
||||
void VP8TBufferInit(VP8TBuffer* const b) { |
||||
(void)b; |
||||
} |
||||
void VP8TBufferClear(VP8TBuffer* const b) { |
||||
(void)b; |
||||
} |
||||
|
||||
#endif // !DISABLE_TOKEN_BUFFER
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
} // extern "C"
|
||||
#endif |
@ -0,0 +1,28 @@ |
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// This code is licensed under the same terms as WebM:
|
||||
// Software License Agreement: http://www.webmproject.org/license/software/
|
||||
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// TODO(skal): implement gradient smoothing.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./quant_levels_dec.h" |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
extern "C" { |
||||
#endif |
||||
|
||||
int DequantizeLevels(uint8_t* const data, int width, int height) { |
||||
if (data == NULL || width <= 0 || height <= 0) return 0; |
||||
(void)data; |
||||
(void)width; |
||||
(void)height; |
||||
return 1; |
||||
} |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
} // extern "C"
|
||||
#endif |
@ -0,0 +1,30 @@ |
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// This code is licensed under the same terms as WebM:
|
||||
// Software License Agreement: http://www.webmproject.org/license/software/
|
||||
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Alpha plane de-quantization utility
|
||||
//
|
||||
// Author: Vikas Arora (vikasa@google.com)
|
||||
|
||||
#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_H_ |
||||
#define WEBP_UTILS_QUANT_LEVELS_DEC_H_ |
||||
|
||||
#include "../webp/types.h" |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
extern "C" { |
||||
#endif |
||||
|
||||
// Apply post-processing to input 'data' of size 'width'x'height' assuming
|
||||
// that the source was quantized to a reduced number of levels.
|
||||
// Returns false in case of error (data is NULL, invalid parameters, ...).
|
||||
int DequantizeLevels(uint8_t* const data, int width, int height); |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
} // extern "C"
|
||||
#endif |
||||
|
||||
#endif /* WEBP_UTILS_QUANT_LEVELS_DEC_H_ */ |
@ -0,0 +1,212 @@ |
||||
// Copyright 2012 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// This code is licensed under the same terms as WebM:
|
||||
// Software License Agreement: http://www.webmproject.org/license/software/
|
||||
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Demux API.
|
||||
// Enables extraction of image and extended format data from WebP files.
|
||||
|
||||
// Code Example: Demuxing WebP data to extract all the frames, ICC profile
|
||||
// and EXIF/XMP metadata.
|
||||
//
|
||||
// WebPDemuxer* demux = WebPDemux(&webp_data);
|
||||
//
|
||||
// uint32_t width = WebPDemuxGetI(demux, WEBP_FF_CANVAS_WIDTH);
|
||||
// uint32_t height = WebPDemuxGetI(demux, WEBP_FF_CANVAS_HEIGHT);
|
||||
// // ... (Get information about the features present in the WebP file).
|
||||
// uint32_t flags = WebPDemuxGetI(demux, WEBP_FF_FORMAT_FLAGS);
|
||||
//
|
||||
// // ... (Iterate over all frames).
|
||||
// WebPIterator iter;
|
||||
// if (WebPDemuxGetFrame(demux, 1, &iter)) {
|
||||
// do {
|
||||
// // ... (Consume 'iter'; e.g. Decode 'iter.fragment' with WebPDecode(),
|
||||
// // ... and get other frame properties like width, height, offsets etc.
|
||||
// // ... see 'struct WebPIterator' below for more info).
|
||||
// } while (WebPDemuxNextFrame(&iter));
|
||||
// WebPDemuxReleaseIterator(&iter);
|
||||
// }
|
||||
//
|
||||
// // ... (Extract metadata).
|
||||
// WebPChunkIterator chunk_iter;
|
||||
// if (flags & ICCP_FLAG) WebPDemuxGetChunk(demux, "ICCP", 1, &chunk_iter);
|
||||
// // ... (Consume the ICC profile in 'chunk_iter.chunk').
|
||||
// WebPDemuxReleaseChunkIterator(&chunk_iter);
|
||||
// if (flags & EXIF_FLAG) WebPDemuxGetChunk(demux, "EXIF", 1, &chunk_iter);
|
||||
// // ... (Consume the EXIF metadata in 'chunk_iter.chunk').
|
||||
// WebPDemuxReleaseChunkIterator(&chunk_iter);
|
||||
// if (flags & XMP_FLAG) WebPDemuxGetChunk(demux, "XMP ", 1, &chunk_iter);
|
||||
// // ... (Consume the XMP metadata in 'chunk_iter.chunk').
|
||||
// WebPDemuxReleaseChunkIterator(&chunk_iter);
|
||||
// WebPDemuxDelete(demux);
|
||||
|
||||
#ifndef WEBP_WEBP_DEMUX_H_ |
||||
#define WEBP_WEBP_DEMUX_H_ |
||||
|
||||
#include "./mux_types.h" |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
extern "C" { |
||||
#endif |
||||
|
||||
#define WEBP_DEMUX_ABI_VERSION 0x0100 // MAJOR(8b) + MINOR(8b)
|
||||
|
||||
typedef struct WebPDemuxer WebPDemuxer; |
||||
#if !(defined(__cplusplus) || defined(c_plusplus)) |
||||
typedef enum WebPDemuxState WebPDemuxState; |
||||
typedef enum WebPFormatFeature WebPFormatFeature; |
||||
#endif |
||||
typedef struct WebPIterator WebPIterator; |
||||
typedef struct WebPChunkIterator WebPChunkIterator; |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Returns the version number of the demux library, packed in hexadecimal using
|
||||
// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
|
||||
WEBP_EXTERN(int) WebPGetDemuxVersion(void); |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Life of a Demux object
|
||||
|
||||
enum WebPDemuxState { |
||||
WEBP_DEMUX_PARSING_HEADER, // Not enough data to parse full header.
|
||||
WEBP_DEMUX_PARSED_HEADER, // Header parsing complete, data may be available.
|
||||
WEBP_DEMUX_DONE // Entire file has been parsed.
|
||||
}; |
||||
|
||||
// Internal, version-checked, entry point
|
||||
WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal( |
||||
const WebPData*, int, WebPDemuxState*, int); |
||||
|
||||
// Parses the full WebP file given by 'data'.
|
||||
// Returns a WebPDemuxer object on successful parse, NULL otherwise.
|
||||
static WEBP_INLINE WebPDemuxer* WebPDemux(const WebPData* data) { |
||||
return WebPDemuxInternal(data, 0, NULL, WEBP_DEMUX_ABI_VERSION); |
||||
} |
||||
|
||||
// Parses the possibly incomplete WebP file given by 'data'.
|
||||
// If 'state' is non-NULL it will be set to indicate the status of the demuxer.
|
||||
// Returns a WebPDemuxer object on successful parse, NULL otherwise.
|
||||
static WEBP_INLINE WebPDemuxer* WebPDemuxPartial( |
||||
const WebPData* data, WebPDemuxState* state) { |
||||
return WebPDemuxInternal(data, 1, state, WEBP_DEMUX_ABI_VERSION); |
||||
} |
||||
|
||||
// Frees memory associated with 'dmux'.
|
||||
WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux); |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Data/information extraction.
|
||||
|
||||
enum WebPFormatFeature { |
||||
WEBP_FF_FORMAT_FLAGS, // Extended format flags present in the 'VP8X' chunk.
|
||||
WEBP_FF_CANVAS_WIDTH, |
||||
WEBP_FF_CANVAS_HEIGHT, |
||||
WEBP_FF_LOOP_COUNT, |
||||
WEBP_FF_BACKGROUND_COLOR, |
||||
WEBP_FF_FRAME_COUNT // Number of frames present in the demux object.
|
||||
// In case of a partial demux, this is the number of
|
||||
// frames seen so far, with the last frame possibly
|
||||
// being partial.
|
||||
}; |
||||
|
||||
// Get the 'feature' value from the 'dmux'.
|
||||
// NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
|
||||
// returned a state > WEBP_DEMUX_PARSING_HEADER.
|
||||
WEBP_EXTERN(uint32_t) WebPDemuxGetI( |
||||
const WebPDemuxer* dmux, WebPFormatFeature feature); |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Frame iteration.
|
||||
|
||||
struct WebPIterator { |
||||
int frame_num; |
||||
int num_frames; // equivalent to WEBP_FF_FRAME_COUNT.
|
||||
int fragment_num; |
||||
int num_fragments; |
||||
int x_offset, y_offset; // offset relative to the canvas.
|
||||
int width, height; // dimensions of this frame or fragment.
|
||||
int duration; // display duration in milliseconds.
|
||||
WebPMuxAnimDispose dispose_method; // dispose method for the frame.
|
||||
int complete; // true if 'fragment' contains a full frame. partial images
|
||||
// may still be decoded with the WebP incremental decoder.
|
||||
WebPData fragment; // The frame or fragment given by 'frame_num' and
|
||||
// 'fragment_num'.
|
||||
|
||||
uint32_t pad[4]; // padding for later use.
|
||||
void* private_; // for internal use only.
|
||||
}; |
||||
|
||||
// Retrieves frame 'frame_number' from 'dmux'.
|
||||
// 'iter->fragment' points to the first fragment on return from this function.
|
||||
// Individual fragments may be extracted using WebPDemuxSetFragment().
|
||||
// Setting 'frame_number' equal to 0 will return the last frame of the image.
|
||||
// Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
|
||||
// Call WebPDemuxReleaseIterator() when use of the iterator is complete.
|
||||
// NOTE: 'dmux' must persist for the lifetime of 'iter'.
|
||||
WEBP_EXTERN(int) WebPDemuxGetFrame( |
||||
const WebPDemuxer* dmux, int frame_number, WebPIterator* iter); |
||||
|
||||
// Sets 'iter->fragment' to point to the next ('iter->frame_num' + 1) or
|
||||
// previous ('iter->frame_num' - 1) frame. These functions do not loop.
|
||||
// Returns true on success, false otherwise.
|
||||
WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter); |
||||
WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter); |
||||
|
||||
// Sets 'iter->fragment' to reflect fragment number 'fragment_num'.
|
||||
// Returns true if fragment 'fragment_num' is present, false otherwise.
|
||||
WEBP_EXTERN(int) WebPDemuxSelectFragment(WebPIterator* iter, int fragment_num); |
||||
|
||||
// Releases any memory associated with 'iter'.
|
||||
// Must be called before any subsequent calls to WebPDemuxGetChunk() on the same
|
||||
// iter. Also, must be called before destroying the associated WebPDemuxer with
|
||||
// WebPDemuxDelete().
|
||||
WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter); |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Chunk iteration.
|
||||
|
||||
struct WebPChunkIterator { |
||||
// The current and total number of chunks with the fourcc given to
|
||||
// WebPDemuxGetChunk().
|
||||
int chunk_num; |
||||
int num_chunks; |
||||
WebPData chunk; // The payload of the chunk.
|
||||
|
||||
uint32_t pad[6]; // padding for later use
|
||||
void* private_; |
||||
}; |
||||
|
||||
// Retrieves the 'chunk_number' instance of the chunk with id 'fourcc' from
|
||||
// 'dmux'.
|
||||
// 'fourcc' is a character array containing the fourcc of the chunk to return,
|
||||
// e.g., "ICCP", "XMP ", "EXIF", etc.
|
||||
// Setting 'chunk_number' equal to 0 will return the last chunk in a set.
|
||||
// Returns true if the chunk is found, false otherwise. Image related chunk
|
||||
// payloads are accessed through WebPDemuxGetFrame() and related functions.
|
||||
// Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
|
||||
// NOTE: 'dmux' must persist for the lifetime of the iterator.
|
||||
WEBP_EXTERN(int) WebPDemuxGetChunk(const WebPDemuxer* dmux, |
||||
const char fourcc[4], int chunk_number, |
||||
WebPChunkIterator* iter); |
||||
|
||||
// Sets 'iter->chunk' to point to the next ('iter->chunk_num' + 1) or previous
|
||||
// ('iter->chunk_num' - 1) chunk. These functions do not loop.
|
||||
// Returns true on success, false otherwise.
|
||||
WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter); |
||||
WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter); |
||||
|
||||
// Releases any memory associated with 'iter'.
|
||||
// Must be called before destroying the associated WebPDemuxer with
|
||||
// WebPDemuxDelete().
|
||||
WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter); |
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
} // extern "C"
|
||||
#endif |
||||
|
||||
#endif /* WEBP_WEBP_DEMUX_H_ */ |
@ -0,0 +1,87 @@ |
||||
// Copyright 2012 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// This code is licensed under the same terms as WebM:
|
||||
// Software License Agreement: http://www.webmproject.org/license/software/
|
||||
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Data-types common to the mux and demux libraries.
|
||||
//
|
||||
// Author: Urvang (urvang@google.com)
|
||||
|
||||
#ifndef WEBP_WEBP_MUX_TYPES_H_ |
||||
#define WEBP_WEBP_MUX_TYPES_H_ |
||||
|
||||
#include <stdlib.h> // free() |
||||
#include <string.h> // memset() |
||||
#include "./types.h" |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
extern "C" { |
||||
#endif |
||||
|
||||
#if !(defined(__cplusplus) || defined(c_plusplus)) |
||||
typedef enum WebPFeatureFlags WebPFeatureFlags; |
||||
typedef enum WebPMuxAnimDispose WebPMuxAnimDispose; |
||||
#endif |
||||
|
||||
// VP8X Feature Flags.
|
||||
enum WebPFeatureFlags { |
||||
FRAGMENTS_FLAG = 0x00000001, |
||||
ANIMATION_FLAG = 0x00000002, |
||||
XMP_FLAG = 0x00000004, |
||||
EXIF_FLAG = 0x00000008, |
||||
ALPHA_FLAG = 0x00000010, |
||||
ICCP_FLAG = 0x00000020 |
||||
}; |
||||
|
||||
// Dispose method (animation only). Indicates how the area used by the current
|
||||
// frame is to be treated before rendering the next frame on the canvas.
|
||||
enum WebPMuxAnimDispose { |
||||
WEBP_MUX_DISPOSE_NONE, // Do not dispose.
|
||||
WEBP_MUX_DISPOSE_BACKGROUND // Dispose to background color.
|
||||
}; |
||||
|
||||
// Data type used to describe 'raw' data, e.g., chunk data
|
||||
// (ICC profile, metadata) and WebP compressed image data.
|
||||
typedef struct WebPData WebPData; |
||||
struct WebPData { |
||||
const uint8_t* bytes; |
||||
size_t size; |
||||
}; |
||||
|
||||
// Initializes the contents of the 'webp_data' object with default values.
|
||||
static WEBP_INLINE void WebPDataInit(WebPData* webp_data) { |
||||
if (webp_data != NULL) { |
||||
memset(webp_data, 0, sizeof(*webp_data)); |
||||
} |
||||
} |
||||
|
||||
// Clears the contents of the 'webp_data' object by calling free(). Does not
|
||||
// deallocate the object itself.
|
||||
static WEBP_INLINE void WebPDataClear(WebPData* webp_data) { |
||||
if (webp_data != NULL) { |
||||
free((void*)webp_data->bytes); |
||||
WebPDataInit(webp_data); |
||||
} |
||||
} |
||||
|
||||
// Allocates necessary storage for 'dst' and copies the contents of 'src'.
|
||||
// Returns true on success.
|
||||
static WEBP_INLINE int WebPDataCopy(const WebPData* src, WebPData* dst) { |
||||
if (src == NULL || dst == NULL) return 0; |
||||
WebPDataInit(dst); |
||||
if (src->bytes != NULL && src->size != 0) { |
||||
dst->bytes = (uint8_t*)malloc(src->size); |
||||
if (dst->bytes == NULL) return 0; |
||||
memcpy((void*)dst->bytes, src->bytes, src->size); |
||||
dst->size = src->size; |
||||
} |
||||
return 1; |
||||
} |
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus) |
||||
} // extern "C"
|
||||
#endif |
||||
|
||||
#endif /* WEBP_WEBP_MUX_TYPES_H_ */ |
Loading…
Reference in new issue