update libwebp up to 0.3.0

pull/737/head
AoD314 12 years ago
parent db45e04d58
commit 740941c8b8
  1. 21
      3rdparty/libwebp/dec/alpha.c
  2. 101
      3rdparty/libwebp/dec/frame.c
  3. 41
      3rdparty/libwebp/dec/idec.c
  4. 109
      3rdparty/libwebp/dec/vp8.c
  5. 14
      3rdparty/libwebp/dec/vp8i.h
  6. 65
      3rdparty/libwebp/dec/vp8l.c
  7. 36
      3rdparty/libwebp/dec/webp.c
  8. 8
      3rdparty/libwebp/dec/webpi.h
  9. 379
      3rdparty/libwebp/demux/demux.c
  10. 15
      3rdparty/libwebp/dsp/dec.c
  11. 73
      3rdparty/libwebp/dsp/dec_neon.c
  12. 20
      3rdparty/libwebp/dsp/dec_sse2.c
  13. 17
      3rdparty/libwebp/dsp/dsp.h
  14. 121
      3rdparty/libwebp/dsp/enc.c
  15. 661
      3rdparty/libwebp/dsp/enc_neon.c
  16. 318
      3rdparty/libwebp/dsp/enc_sse2.c
  17. 352
      3rdparty/libwebp/dsp/lossless.c
  18. 16
      3rdparty/libwebp/dsp/lossless.h
  19. 12
      3rdparty/libwebp/dsp/upsampling.c
  20. 292
      3rdparty/libwebp/dsp/upsampling_neon.c
  21. 20
      3rdparty/libwebp/dsp/upsampling_sse2.c
  22. 23
      3rdparty/libwebp/dsp/yuv.c
  23. 214
      3rdparty/libwebp/dsp/yuv.h
  24. 93
      3rdparty/libwebp/enc/alpha.c
  25. 243
      3rdparty/libwebp/enc/analysis.c
  26. 169
      3rdparty/libwebp/enc/backward_references.c
  27. 37
      3rdparty/libwebp/enc/backward_references.h
  28. 13
      3rdparty/libwebp/enc/config.c
  29. 4
      3rdparty/libwebp/enc/cost.c
  30. 3
      3rdparty/libwebp/enc/cost.h
  31. 576
      3rdparty/libwebp/enc/frame.c
  32. 232
      3rdparty/libwebp/enc/histogram.c
  33. 16
      3rdparty/libwebp/enc/histogram.h
  34. 154
      3rdparty/libwebp/enc/picture.c
  35. 180
      3rdparty/libwebp/enc/quant.c
  36. 25
      3rdparty/libwebp/enc/syntax.c
  37. 254
      3rdparty/libwebp/enc/token.c
  38. 119
      3rdparty/libwebp/enc/vp8enci.h
  39. 114
      3rdparty/libwebp/enc/vp8l.c
  40. 107
      3rdparty/libwebp/enc/webpenc.c
  41. 523
      3rdparty/libwebp/mux/muxedit.c
  42. 101
      3rdparty/libwebp/mux/muxi.h
  43. 231
      3rdparty/libwebp/mux/muxinternal.c
  44. 323
      3rdparty/libwebp/mux/muxread.c
  45. 107
      3rdparty/libwebp/utils/bit_reader.c
  46. 229
      3rdparty/libwebp/utils/bit_reader.h
  47. 67
      3rdparty/libwebp/utils/filters.c
  48. 9
      3rdparty/libwebp/utils/filters.h
  49. 15
      3rdparty/libwebp/utils/huffman_encode.c
  50. 9
      3rdparty/libwebp/utils/quant_levels.c
  51. 5
      3rdparty/libwebp/utils/quant_levels.h
  52. 28
      3rdparty/libwebp/utils/quant_levels_dec.c
  53. 30
      3rdparty/libwebp/utils/quant_levels_dec.h
  54. 2
      3rdparty/libwebp/utils/rescaler.c
  55. 4
      3rdparty/libwebp/utils/thread.c
  56. 12
      3rdparty/libwebp/utils/thread.h
  57. 9
      3rdparty/libwebp/utils/utils.c
  58. 39
      3rdparty/libwebp/utils/utils.h
  59. 120
      3rdparty/libwebp/webp/decode.h
  60. 212
      3rdparty/libwebp/webp/demux.h
  61. 67
      3rdparty/libwebp/webp/encode.h
  62. 26
      3rdparty/libwebp/webp/format_constants.h
  63. 531
      3rdparty/libwebp/webp/mux.h
  64. 87
      3rdparty/libwebp/webp/mux_types.h

@ -13,7 +13,7 @@
#include "./vp8i.h"
#include "./vp8li.h"
#include "../utils/filters.h"
#include "../utils/quant_levels.h"
#include "../utils/quant_levels_dec.h"
#include "../webp/format_constants.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -44,7 +44,6 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
int width, int height, int stride, uint8_t* output) {
uint8_t* decoded_data = NULL;
const size_t decoded_size = height * width;
uint8_t* unfiltered_data = NULL;
WEBP_FILTER_TYPE filter;
int pre_processing;
int rsrv;
@ -83,29 +82,19 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
}
if (ok) {
WebPFilterFunc unfilter_func = WebPUnfilters[filter];
WebPUnfilterFunc unfilter_func = WebPUnfilters[filter];
if (unfilter_func != NULL) {
unfiltered_data = (uint8_t*)malloc(decoded_size);
if (unfiltered_data == NULL) {
ok = 0;
goto Error;
}
// TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
// and apply filter per image-row.
unfilter_func(decoded_data, width, height, 1, width, unfiltered_data);
// Construct raw_data (height x stride) from alpha data (height x width).
CopyPlane(unfiltered_data, width, output, stride, width, height);
free(unfiltered_data);
} else {
// Construct raw_data (height x stride) from alpha data (height x width).
CopyPlane(decoded_data, width, output, stride, width, height);
unfilter_func(width, height, width, decoded_data);
}
// Construct raw_data (height x stride) from alpha data (height x width).
CopyPlane(decoded_data, width, output, stride, width, height);
if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
ok = DequantizeLevels(decoded_data, width, height);
}
}
Error:
if (method != ALPHA_NO_COMPRESSION) {
free(decoded_data);
}

@ -97,54 +97,51 @@ static void FilterRow(const VP8Decoder* const dec) {
}
//------------------------------------------------------------------------------
// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
void VP8StoreBlock(VP8Decoder* const dec) {
static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
if (dec->filter_type_ > 0) {
VP8FInfo* const info = dec->f_info_ + dec->mb_x_;
const int skip = dec->mb_info_[dec->mb_x_].skip_;
int level = dec->filter_levels_[dec->segment_];
if (dec->filter_hdr_.use_lf_delta_) {
// TODO(skal): only CURRENT is handled for now.
level += dec->filter_hdr_.ref_lf_delta_[0];
if (dec->is_i4x4_) {
level += dec->filter_hdr_.mode_lf_delta_[0];
}
}
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
info->f_level_ = level;
if (dec->filter_hdr_.sharpness_ > 0) {
if (dec->filter_hdr_.sharpness_ > 4) {
level >>= 2;
int s;
const VP8FilterHeader* const hdr = &dec->filter_hdr_;
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
int i4x4;
// First, compute the initial level
int base_level;
if (dec->segment_hdr_.use_segment_) {
base_level = dec->segment_hdr_.filter_strength_[s];
if (!dec->segment_hdr_.absolute_delta_) {
base_level += hdr->level_;
}
} else {
level >>= 1;
base_level = hdr->level_;
}
if (level > 9 - dec->filter_hdr_.sharpness_) {
level = 9 - dec->filter_hdr_.sharpness_;
for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
int level = base_level;
if (hdr->use_lf_delta_) {
// TODO(skal): only CURRENT is handled for now.
level += hdr->ref_lf_delta_[0];
if (i4x4) {
level += hdr->mode_lf_delta_[0];
}
}
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
info->f_level_ = level;
if (hdr->sharpness_ > 0) {
if (hdr->sharpness_ > 4) {
level >>= 2;
} else {
level >>= 1;
}
if (level > 9 - hdr->sharpness_) {
level = 9 - hdr->sharpness_;
}
}
info->f_ilevel_ = (level < 1) ? 1 : level;
info->f_inner_ = 0;
}
}
info->f_ilevel_ = (level < 1) ? 1 : level;
info->f_inner_ = (!skip || dec->is_i4x4_);
}
{
// Transfer samples to row cache
int y;
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
for (y = 0; y < 16; ++y) {
memcpy(ydst + y * dec->cache_y_stride_,
dec->yuv_b_ + Y_OFF + y * BPS, 16);
}
for (y = 0; y < 8; ++y) {
memcpy(udst + y * dec->cache_uv_stride_,
dec->yuv_b_ + U_OFF + y * BPS, 8);
memcpy(vdst + y * dec->cache_uv_stride_,
dec->yuv_b_ + V_OFF + y * BPS, 8);
}
}
}
@ -339,6 +336,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
dec->br_mb_y_ = dec->mb_h_;
}
}
PrecomputeFilterStrengths(dec);
return VP8_STATUS_OK;
}
@ -496,6 +494,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
// alpha plane
dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
mem += alpha_size;
assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
// note: left-info is initialized once for all.
memset(dec->mb_info_ - 1, 0, mb_info_size);
@ -551,6 +550,7 @@ static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
}
void VP8ReconstructBlock(VP8Decoder* const dec) {
int j;
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
@ -558,7 +558,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
// Rotate in the left samples from previously decoded block. We move four
// pixels at a time for alignment reason, and because of in-loop filter.
if (dec->mb_x_ > 0) {
int j;
for (j = -1; j < 16; ++j) {
Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
}
@ -567,7 +566,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
}
} else {
int j;
for (j = 0; j < 16; ++j) {
y_dst[j * BPS - 1] = 129;
}
@ -670,6 +668,21 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
}
}
}
// Transfer reconstructed samples from yuv_b_ cache to final destination.
{
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
for (j = 0; j < 16; ++j) {
memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
}
for (j = 0; j < 8; ++j) {
memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
}
}
}
//------------------------------------------------------------------------------

@ -425,9 +425,8 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
}
return VP8_STATUS_SUSPENDED;
}
// Reconstruct and emit samples.
VP8ReconstructBlock(dec);
// Store data and save block's filtering params
VP8StoreBlock(dec);
// Release buffer only if there is only one partition
if (dec->num_parts_ == 1) {
@ -596,12 +595,22 @@ void WebPIDelete(WebPIDecoder* idec) {
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
size_t output_buffer_size, int output_stride) {
const int is_external_memory = (output_buffer != NULL);
WebPIDecoder* idec;
if (mode >= MODE_YUV) return NULL;
if (!is_external_memory) { // Overwrite parameters to sane values.
output_buffer_size = 0;
output_stride = 0;
} else { // A buffer was passed. Validate the other params.
if (output_stride == 0 || output_buffer_size == 0) {
return NULL; // invalid parameter.
}
}
idec = WebPINewDecoder(NULL);
if (idec == NULL) return NULL;
idec->output_.colorspace = mode;
idec->output_.is_external_memory = 1;
idec->output_.is_external_memory = is_external_memory;
idec->output_.u.RGBA.rgba = output_buffer;
idec->output_.u.RGBA.stride = output_stride;
idec->output_.u.RGBA.size = output_buffer_size;
@ -612,10 +621,30 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
uint8_t* u, size_t u_size, int u_stride,
uint8_t* v, size_t v_size, int v_stride,
uint8_t* a, size_t a_size, int a_stride) {
WebPIDecoder* const idec = WebPINewDecoder(NULL);
const int is_external_memory = (luma != NULL);
WebPIDecoder* idec;
WEBP_CSP_MODE colorspace;
if (!is_external_memory) { // Overwrite parameters to sane values.
luma_size = u_size = v_size = a_size = 0;
luma_stride = u_stride = v_stride = a_stride = 0;
u = v = a = NULL;
colorspace = MODE_YUVA;
} else { // A luma buffer was passed. Validate the other parameters.
if (u == NULL || v == NULL) return NULL;
if (luma_size == 0 || u_size == 0 || v_size == 0) return NULL;
if (luma_stride == 0 || u_stride == 0 || v_stride == 0) return NULL;
if (a != NULL) {
if (a_size == 0 || a_stride == 0) return NULL;
}
colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
}
idec = WebPINewDecoder(NULL);
if (idec == NULL) return NULL;
idec->output_.colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
idec->output_.is_external_memory = 1;
idec->output_.colorspace = colorspace;
idec->output_.is_external_memory = is_external_memory;
idec->output_.u.YUVA.y = luma;
idec->output_.u.YUVA.y_stride = luma_stride;
idec->output_.u.YUVA.y_size = luma_size;

@ -236,20 +236,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
}
}
dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
if (dec->filter_type_ > 0) { // precompute filter levels per segment
if (dec->segment_hdr_.use_segment_) {
int s;
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
int strength = dec->segment_hdr_.filter_strength_[s];
if (!dec->segment_hdr_.absolute_delta_) {
strength += hdr->level_;
}
dec->filter_levels_[s] = strength;
}
} else {
dec->filter_levels_[0] = hdr->level_;
}
}
return !br->eof_;
}
@ -458,7 +444,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
//------------------------------------------------------------------------------
// Residual decoding (Paragraph 13.2 / 13.3)
static const uint8_t kBands[16 + 1] = {
static const int kBands[16 + 1] = {
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
0 // extra entry as sentinel
};
@ -474,6 +460,39 @@ static const uint8_t kZigzag[16] = {
};
typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting
typedef const uint8_t (*ProbaCtxArray)[NUM_PROBAS];
// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
int v;
if (!VP8GetBit(br, p[3])) {
if (!VP8GetBit(br, p[4])) {
v = 2;
} else {
v = 3 + VP8GetBit(br, p[5]);
}
} else {
if (!VP8GetBit(br, p[6])) {
if (!VP8GetBit(br, p[7])) {
v = 5 + VP8GetBit(br, 159);
} else {
v = 7 + 2 * VP8GetBit(br, 165);
v += VP8GetBit(br, 145);
}
} else {
const uint8_t* tab;
const int bit1 = VP8GetBit(br, p[8]);
const int bit0 = VP8GetBit(br, p[9 + bit1]);
const int cat = 2 * bit1 + bit0;
v = 0;
for (tab = kCat3456[cat]; *tab; ++tab) {
v += v + VP8GetBit(br, *tab);
}
v += 3 + (8 << cat);
}
}
return v;
}
// Returns the position of the last non-zero coeff plus one
// (and 0 if there's no coeff at all)
@ -484,54 +503,26 @@ static int GetCoeffs(VP8BitReader* const br, ProbaArray prob,
if (!VP8GetBit(br, p[0])) { // first EOB is more a 'CBP' bit.
return 0;
}
while (1) {
++n;
for (; n < 16; ++n) {
const ProbaCtxArray p_ctx = prob[kBands[n + 1]];
if (!VP8GetBit(br, p[1])) {
p = prob[kBands[n]][0];
p = p_ctx[0];
} else { // non zero coeff
int v, j;
int v;
if (!VP8GetBit(br, p[2])) {
p = prob[kBands[n]][1];
v = 1;
p = p_ctx[1];
} else {
if (!VP8GetBit(br, p[3])) {
if (!VP8GetBit(br, p[4])) {
v = 2;
} else {
v = 3 + VP8GetBit(br, p[5]);
}
} else {
if (!VP8GetBit(br, p[6])) {
if (!VP8GetBit(br, p[7])) {
v = 5 + VP8GetBit(br, 159);
} else {
v = 7 + 2 * VP8GetBit(br, 165);
v += VP8GetBit(br, 145);
}
} else {
const uint8_t* tab;
const int bit1 = VP8GetBit(br, p[8]);
const int bit0 = VP8GetBit(br, p[9 + bit1]);
const int cat = 2 * bit1 + bit0;
v = 0;
for (tab = kCat3456[cat]; *tab; ++tab) {
v += v + VP8GetBit(br, *tab);
}
v += 3 + (8 << cat);
}
}
p = prob[kBands[n]][2];
v = GetLargeValue(br, p);
p = p_ctx[2];
}
j = kZigzag[n - 1];
out[j] = VP8GetSigned(br, v) * dq[j > 0];
if (n == 16 || !VP8GetBit(br, p[0])) { // EOB
return n;
out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
if (n < 15 && !VP8GetBit(br, p[0])) { // EOB
return n + 1;
}
}
if (n == 16) {
return 16;
}
}
return 16;
}
// Alias-safe way of converting 4bytes to 32bits.
@ -670,6 +661,12 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
dec->non_zero_ac_ = 0;
}
if (dec->filter_type_ > 0) { // store filter info
VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
*finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_];
finfo->f_inner_ = (!info->skip_ || dec->is_i4x4_);
}
return (!token_br->eof_);
}
@ -693,10 +690,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"Premature end-of-file encountered.");
}
// Reconstruct and emit samples.
VP8ReconstructBlock(dec);
// Store data and save block's filtering params
VP8StoreBlock(dec);
}
if (!VP8ProcessRow(dec, io)) {
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");

@ -27,8 +27,8 @@ extern "C" {
// version numbers
#define DEC_MAJ_VERSION 0
#define DEC_MIN_VERSION 2
#define DEC_REV_VERSION 1
#define DEC_MIN_VERSION 3
#define DEC_REV_VERSION 0
#define ONLY_KEYFRAME_CODE // to remove any code related to P-Frames
@ -157,7 +157,7 @@ typedef struct { // filter specs
} VP8FInfo;
typedef struct { // used for syntax-parsing
unsigned int nz_; // non-zero AC/DC coeffs
unsigned int nz_:24; // non-zero AC/DC coeffs (24bit)
unsigned int dc_nz_:1; // non-zero DC coeffs
unsigned int skip_:1; // block type
} VP8MB;
@ -269,9 +269,9 @@ struct VP8Decoder {
uint32_t non_zero_ac_;
// Filtering side-info
int filter_type_; // 0=off, 1=simple, 2=complex
int filter_row_; // per-row flag
uint8_t filter_levels_[NUM_MB_SEGMENTS]; // precalculated per-segment
int filter_type_; // 0=off, 1=simple, 2=complex
int filter_row_; // per-row flag
VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type
// extensions
const uint8_t* alpha_data_; // compressed alpha data (if present)
@ -312,8 +312,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
// Process the last decoded row (filtering + output)
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
// Store a block, along with filtering params
void VP8StoreBlock(VP8Decoder* const dec);
// To be called at the start of a new scanline, to initialize predictors.
void VP8InitScanline(VP8Decoder* const dec);
// Decode one macroblock. Returns false if there is not enough data.

@ -58,18 +58,18 @@ static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
#define CODE_TO_PLANE_CODES 120
static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = {
0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
};
static int DecodeImageStream(int xsize, int ysize,
@ -149,31 +149,22 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
//------------------------------------------------------------------------------
// Decodes the next Huffman code from bit-stream.
// FillBitWindow(br) needs to be called at minimum every second call
// to ReadSymbolUnsafe.
static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) {
// to ReadSymbol, in order to pre-fetch enough bits.
static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
VP8LBitReader* const br) {
const HuffmanTreeNode* node = tree->root_;
int num_bits = 0;
uint32_t bits = VP8LPrefetchBits(br);
assert(node != NULL);
while (!HuffmanTreeNodeIsLeaf(node)) {
node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br));
node = HuffmanTreeNextNode(node, bits & 1);
bits >>= 1;
++num_bits;
}
VP8LDiscardBits(br, num_bits);
return node->symbol_;
}
static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
VP8LBitReader* const br) {
const int read_safe = (br->pos_ + 8 > br->len_);
if (!read_safe) {
return ReadSymbolUnsafe(tree, br);
} else {
const HuffmanTreeNode* node = tree->root_;
assert(node != NULL);
while (!HuffmanTreeNodeIsLeaf(node)) {
node = HuffmanTreeNextNode(node, VP8LReadOneBit(br));
}
return node->symbol_;
}
}
static int ReadHuffmanCodeLengths(
VP8LDecoder* const dec, const int* const code_length_code_lengths,
int num_symbols, int* const code_lengths) {
@ -327,10 +318,10 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
hdr->huffman_subsample_bits_ = huffman_precision;
for (i = 0; i < huffman_pixs; ++i) {
// The huffman data is stored in red and green bytes.
const int index = (huffman_image[i] >> 8) & 0xffff;
huffman_image[i] = index;
if (index >= num_htree_groups) {
num_htree_groups = index + 1;
const int group = (huffman_image[i] >> 8) & 0xffff;
huffman_image[i] = group;
if (group >= num_htree_groups) {
num_htree_groups = group + 1;
}
}
}
@ -1146,9 +1137,9 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
return 1;
Error:
VP8LClear(dec);
assert(dec->status_ != VP8_STATUS_OK);
return 0;
VP8LClear(dec);
assert(dec->status_ != VP8_STATUS_OK);
return 0;
}
int VP8LDecodeImage(VP8LDecoder* const dec) {

@ -14,7 +14,7 @@
#include "./vp8i.h"
#include "./vp8li.h"
#include "./webpi.h"
#include "../webp/format_constants.h"
#include "../webp/mux_types.h" // ALPHA_FLAG
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -40,8 +40,8 @@ extern "C" {
// 20..23 VP8X flags bit-map corresponding to the chunk-types present.
// 24..26 Width of the Canvas Image.
// 27..29 Height of the Canvas Image.
// There can be extra chunks after the "VP8X" chunk (ICCP, TILE, FRM, VP8,
// META ...)
// There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8,
// VP8L, XMP, EXIF ...)
// All sizes are in little-endian order.
// Note: chunk data size must be padded to multiple of 2 when written.
@ -276,6 +276,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
int* const width,
int* const height,
int* const has_alpha,
int* const has_animation,
WebPHeaderStructure* const headers) {
int found_riff = 0;
int found_vp8x = 0;
@ -308,7 +309,8 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
// necessary to send VP8X chunk to the decoder.
return VP8_STATUS_BITSTREAM_ERROR;
}
if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG_BIT);
if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
if (has_animation != NULL) *has_animation = !!(flags & ANIMATION_FLAG);
if (found_vp8x && headers == NULL) {
return VP8_STATUS_OK; // Return features from VP8X header.
}
@ -370,10 +372,19 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
}
VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
VP8StatusCode status;
int has_animation = 0;
assert(headers != NULL);
// fill out headers, ignore width/height/has_alpha.
return ParseHeadersInternal(headers->data, headers->data_size,
NULL, NULL, NULL, headers);
status = ParseHeadersInternal(headers->data, headers->data_size,
NULL, NULL, NULL, &has_animation, headers);
if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
// TODO(jzern): full support of animation frames will require API additions.
if (has_animation) {
status = VP8_STATUS_UNSUPPORTED_FEATURE;
}
}
return status;
}
//------------------------------------------------------------------------------
@ -625,10 +636,11 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
}
DefaultFeatures(features);
// Only parse enough of the data to retrieve width/height/has_alpha.
// Only parse enough of the data to retrieve the features.
return ParseHeadersInternal(data, data_size,
&features->width, &features->height,
&features->has_alpha, NULL);
&features->has_alpha, &features->has_animation,
NULL);
}
//------------------------------------------------------------------------------
@ -672,19 +684,13 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
WebPBitstreamFeatures* features,
int version) {
VP8StatusCode status;
if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
return VP8_STATUS_INVALID_PARAM; // version mismatch
}
if (features == NULL) {
return VP8_STATUS_INVALID_PARAM;
}
status = GetFeatures(data, data_size, features);
if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
return VP8_STATUS_BITSTREAM_ERROR; // Not-enough-data treated as error.
}
return status;
return GetFeatures(data, data_size, features);
}
VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,

@ -61,10 +61,10 @@ typedef struct {
} WebPHeaderStructure;
// Skips over all valid chunks prior to the first VP8/VP8L frame header.
// Returns VP8_STATUS_OK on success,
// VP8_STATUS_BITSTREAM_ERROR if an invalid header/chunk is found, and
// VP8_STATUS_NOT_ENOUGH_DATA if case of insufficient data.
// In 'headers', compressed_size, offset, alpha_data, alpha_size and lossless
// Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk),
// VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE
// in the case of non-decodable features (animation for instance).
// In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless
// fields are updated appropriately upon success.
VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);

@ -8,19 +8,26 @@
// WebP container demux.
//
#include "../webp/mux.h"
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "../webp/decode.h" // WebPGetInfo
#include "../utils/utils.h"
#include "../webp/decode.h" // WebPGetFeatures
#include "../webp/demux.h"
#include "../webp/format_constants.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define MKFOURCC(a, b, c, d) ((uint32_t)(a) | (b) << 8 | (c) << 16 | (d) << 24)
#define DMUX_MAJ_VERSION 0
#define DMUX_MIN_VERSION 1
#define DMUX_REV_VERSION 0
typedef struct {
size_t start_; // start location of the data
@ -39,8 +46,9 @@ typedef struct Frame {
int x_offset_, y_offset_;
int width_, height_;
int duration_;
int is_tile_; // this is an image fragment from a 'TILE'.
int frame_num_; // the referent frame number for use in assembling tiles.
WebPMuxAnimDispose dispose_method_;
int is_fragment_; // this is a frame fragment (and not a full frame).
int frame_num_; // the referent frame number for use in assembling fragments.
int complete_; // img_components_ contains a full image.
ChunkData img_components_[2]; // 0=VP8{,L} 1=ALPH
struct Frame* next_;
@ -58,8 +66,10 @@ struct WebPDemuxer {
uint32_t feature_flags_;
int canvas_width_, canvas_height_;
int loop_count_;
uint32_t bgcolor_;
int num_frames_;
Frame* frames_;
Frame** frames_tail_;
Chunk* chunks_; // non-image chunks
};
@ -87,6 +97,12 @@ static const ChunkParser kMasterChunks[] = {
{ { '0', '0', '0', '0' }, NULL, NULL },
};
//------------------------------------------------------------------------------
int WebPGetDemuxVersion(void) {
return (DMUX_MAJ_VERSION << 16) | (DMUX_MIN_VERSION << 8) | DMUX_REV_VERSION;
}
// -----------------------------------------------------------------------------
// MemBuffer
@ -127,43 +143,30 @@ static WEBP_INLINE const uint8_t* GetBuffer(MemBuffer* const mem) {
return mem->buf_ + mem->start_;
}
static WEBP_INLINE uint8_t GetByte(MemBuffer* const mem) {
// Read from 'mem' and skip the read bytes.
static WEBP_INLINE uint8_t ReadByte(MemBuffer* const mem) {
const uint8_t byte = mem->buf_[mem->start_];
Skip(mem, 1);
return byte;
}
// Read 16, 24 or 32 bits stored in little-endian order.
static WEBP_INLINE int ReadLE16s(const uint8_t* const data) {
return (int)(data[0] << 0) | (data[1] << 8);
}
static WEBP_INLINE int ReadLE24s(const uint8_t* const data) {
return ReadLE16s(data) | (data[2] << 16);
}
static WEBP_INLINE uint32_t ReadLE32(const uint8_t* const data) {
return (uint32_t)ReadLE24s(data) | (data[3] << 24);
}
// In addition to reading, skip the read bytes.
static WEBP_INLINE int GetLE16s(MemBuffer* const mem) {
static WEBP_INLINE int ReadLE16s(MemBuffer* const mem) {
const uint8_t* const data = mem->buf_ + mem->start_;
const int val = ReadLE16s(data);
const int val = GetLE16(data);
Skip(mem, 2);
return val;
}
static WEBP_INLINE int GetLE24s(MemBuffer* const mem) {
static WEBP_INLINE int ReadLE24s(MemBuffer* const mem) {
const uint8_t* const data = mem->buf_ + mem->start_;
const int val = ReadLE24s(data);
const int val = GetLE24(data);
Skip(mem, 3);
return val;
}
static WEBP_INLINE uint32_t GetLE32(MemBuffer* const mem) {
static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
const uint8_t* const data = mem->buf_ + mem->start_;
const uint32_t val = ReadLE32(data);
const uint32_t val = GetLE32(data);
Skip(mem, 4);
return val;
}
@ -181,32 +184,34 @@ static void AddChunk(WebPDemuxer* const dmux, Chunk* const chunk) {
// Add a frame to the end of the list, ensuring the last frame is complete.
// Returns true on success, false otherwise.
static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
const Frame* last_frame = NULL;
Frame** f = &dmux->frames_;
while (*f != NULL) {
last_frame = *f;
f = &(*f)->next_;
}
const Frame* const last_frame = *dmux->frames_tail_;
if (last_frame != NULL && !last_frame->complete_) return 0;
*f = frame;
*dmux->frames_tail_ = frame;
frame->next_ = NULL;
dmux->frames_tail_ = &frame->next_;
return 1;
}
// Store image bearing chunks to 'frame'.
static ParseStatus StoreFrame(int frame_num, MemBuffer* const mem,
Frame* const frame) {
// If 'has_vp8l_alpha' is not NULL, it will be set to true if the frame is a
// lossless image with alpha.
static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
MemBuffer* const mem, Frame* const frame,
int* const has_vp8l_alpha) {
int alpha_chunks = 0;
int image_chunks = 0;
int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE);
int done = (MemDataSize(mem) < min_size);
ParseStatus status = PARSE_OK;
if (has_vp8l_alpha != NULL) *has_vp8l_alpha = 0; // Default.
if (done) return PARSE_NEED_MORE_DATA;
do {
const size_t chunk_start_offset = mem->start_;
const uint32_t fourcc = GetLE32(mem);
const uint32_t payload_size = GetLE32(mem);
const uint32_t fourcc = ReadLE32(mem);
const uint32_t payload_size = ReadLE32(mem);
const uint32_t payload_size_padded = payload_size + (payload_size & 1);
const size_t payload_available = (payload_size_padded > MemDataSize(mem))
? MemDataSize(mem) : payload_size_padded;
@ -228,23 +233,30 @@ static ParseStatus StoreFrame(int frame_num, MemBuffer* const mem,
goto Done;
}
break;
case MKFOURCC('V', 'P', '8', ' '):
case MKFOURCC('V', 'P', '8', 'L'):
if (alpha_chunks > 0) return PARSE_ERROR; // VP8L has its own alpha
// fall through
case MKFOURCC('V', 'P', '8', ' '):
if (image_chunks == 0) {
int width = 0, height = 0;
// Extract the bitstream features, tolerating failures when the data
// is incomplete.
WebPBitstreamFeatures features;
const VP8StatusCode vp8_status =
WebPGetFeatures(mem->buf_ + chunk_start_offset, chunk_size,
&features);
if (status == PARSE_NEED_MORE_DATA &&
vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
return PARSE_NEED_MORE_DATA;
} else if (vp8_status != VP8_STATUS_OK) {
// We have enough data, and yet WebPGetFeatures() failed.
return PARSE_ERROR;
}
++image_chunks;
frame->img_components_[0].offset_ = chunk_start_offset;
frame->img_components_[0].size_ = chunk_size;
// Extract the width and height from the bitstream, tolerating
// failures when the data is incomplete.
if (!WebPGetInfo(mem->buf_ + frame->img_components_[0].offset_,
frame->img_components_[0].size_, &width, &height) &&
status != PARSE_NEED_MORE_DATA) {
return PARSE_ERROR;
}
frame->width_ = width;
frame->height_ = height;
frame->width_ = features.width;
frame->height_ = features.height;
if (has_vp8l_alpha != NULL) *has_vp8l_alpha = features.has_alpha;
frame->frame_num_ = frame_num;
frame->complete_ = (status == PARSE_OK);
Skip(mem, payload_available);
@ -275,42 +287,43 @@ static ParseStatus StoreFrame(int frame_num, MemBuffer* const mem,
// Returns PARSE_OK on success with *frame pointing to the new Frame.
// Returns PARSE_NEED_MORE_DATA with insufficient data, PARSE_ERROR otherwise.
static ParseStatus NewFrame(const MemBuffer* const mem,
uint32_t min_size, uint32_t expected_size,
uint32_t actual_size, Frame** frame) {
uint32_t min_size, uint32_t actual_size,
Frame** frame) {
if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
if (actual_size < expected_size) return PARSE_ERROR;
if (actual_size < min_size) return PARSE_ERROR;
if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
*frame = (Frame*)calloc(1, sizeof(**frame));
return (*frame == NULL) ? PARSE_ERROR : PARSE_OK;
}
// Parse a 'FRM ' chunk and any image bearing chunks that immediately follow.
// Parse a 'ANMF' chunk and any image bearing chunks that immediately follow.
// 'frame_chunk_size' is the previously validated, padded chunk size.
static ParseStatus ParseFrame(
static ParseStatus ParseAnimationFrame(
WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
const int has_frames = !!(dmux->feature_flags_ & ANIMATION_FLAG);
const uint32_t min_size = frame_chunk_size + CHUNK_HEADER_SIZE;
const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
int added_frame = 0;
MemBuffer* const mem = &dmux->mem_;
Frame* frame;
ParseStatus status =
NewFrame(mem, min_size, FRAME_CHUNK_SIZE, frame_chunk_size, &frame);
NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
if (status != PARSE_OK) return status;
frame->x_offset_ = 2 * GetLE24s(mem);
frame->y_offset_ = 2 * GetLE24s(mem);
frame->width_ = 1 + GetLE24s(mem);
frame->height_ = 1 + GetLE24s(mem);
frame->duration_ = 1 + GetLE24s(mem);
Skip(mem, frame_chunk_size - FRAME_CHUNK_SIZE); // skip any trailing data.
frame->x_offset_ = 2 * ReadLE24s(mem);
frame->y_offset_ = 2 * ReadLE24s(mem);
frame->width_ = 1 + ReadLE24s(mem);
frame->height_ = 1 + ReadLE24s(mem);
frame->duration_ = ReadLE24s(mem);
frame->dispose_method_ = (WebPMuxAnimDispose)(ReadByte(mem) & 1);
if (frame->width_ * (uint64_t)frame->height_ >= MAX_IMAGE_AREA) {
return PARSE_ERROR;
}
// Store a (potentially partial) frame only if the animation flag is set
// and there is some data in 'frame'.
status = StoreFrame(dmux->num_frames_ + 1, mem, frame);
// Store a frame only if the animation flag is set there is some data for
// this frame is available.
status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame,
NULL);
if (status != PARSE_ERROR && has_frames && frame->frame_num_ > 0) {
added_frame = AddFrame(dmux, frame);
if (added_frame) {
@ -324,38 +337,43 @@ static ParseStatus ParseFrame(
return status;
}
// Parse a 'TILE' chunk and any image bearing chunks that immediately follow.
// 'tile_chunk_size' is the previously validated, padded chunk size.
static ParseStatus ParseTile(WebPDemuxer* const dmux,
uint32_t tile_chunk_size) {
const int has_tiles = !!(dmux->feature_flags_ & TILE_FLAG);
const uint32_t min_size = tile_chunk_size + CHUNK_HEADER_SIZE;
int added_tile = 0;
#ifdef WEBP_EXPERIMENTAL_FEATURES
// Parse a 'FRGM' chunk and any image bearing chunks that immediately follow.
// 'fragment_chunk_size' is the previously validated, padded chunk size.
static ParseStatus ParseFragment(WebPDemuxer* const dmux,
uint32_t fragment_chunk_size) {
const int frame_num = 1; // All fragments belong to the 1st (and only) frame.
const int has_fragments = !!(dmux->feature_flags_ & FRAGMENTS_FLAG);
const uint32_t frgm_payload_size = fragment_chunk_size - FRGM_CHUNK_SIZE;
int added_fragment = 0;
MemBuffer* const mem = &dmux->mem_;
Frame* frame;
ParseStatus status =
NewFrame(mem, min_size, TILE_CHUNK_SIZE, tile_chunk_size, &frame);
NewFrame(mem, FRGM_CHUNK_SIZE, fragment_chunk_size, &frame);
if (status != PARSE_OK) return status;
frame->is_tile_ = 1;
frame->x_offset_ = 2 * GetLE24s(mem);
frame->y_offset_ = 2 * GetLE24s(mem);
Skip(mem, tile_chunk_size - TILE_CHUNK_SIZE); // skip any trailing data.
// Store a (potentially partial) tile only if the tile flag is set
// and the tile contains some data.
status = StoreFrame(dmux->num_frames_, mem, frame);
if (status != PARSE_ERROR && has_tiles && frame->frame_num_ > 0) {
// Note num_frames_ is incremented only when all tiles have been consumed.
added_tile = AddFrame(dmux, frame);
if (!added_tile) status = PARSE_ERROR;
frame->is_fragment_ = 1;
frame->x_offset_ = 2 * ReadLE24s(mem);
frame->y_offset_ = 2 * ReadLE24s(mem);
// Store a fragment only if the fragments flag is set there is some data for
// this fragment is available.
status = StoreFrame(frame_num, frgm_payload_size, mem, frame, NULL);
if (status != PARSE_ERROR && has_fragments && frame->frame_num_ > 0) {
added_fragment = AddFrame(dmux, frame);
if (!added_fragment) {
status = PARSE_ERROR;
} else {
dmux->num_frames_ = 1;
}
}
if (!added_tile) free(frame);
if (!added_fragment) free(frame);
return status;
}
#endif // WEBP_EXPERIMENTAL_FEATURES
// General chunk storage starting with the header at 'start_offset' allowing
// General chunk storage, starting with the header at 'start_offset', allowing
// the user to request the payload via a fourcc string. 'size' includes the
// header and the unpadded payload size.
// Returns true on success, false otherwise.
@ -384,7 +402,7 @@ static int ReadHeader(MemBuffer* const mem) {
return 0;
}
riff_size = ReadLE32(GetBuffer(mem) + TAG_SIZE);
riff_size = GetLE32(GetBuffer(mem) + TAG_SIZE);
if (riff_size < CHUNK_HEADER_SIZE) return 0;
if (riff_size > MAX_CHUNK_PAYLOAD) return 0;
@ -403,6 +421,7 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
MemBuffer* const mem = &dmux->mem_;
Frame* frame;
ParseStatus status;
int has_vp8l_alpha = 0; // Frame contains a lossless image with alpha.
if (dmux->frames_ != NULL) return PARSE_ERROR;
if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
@ -411,7 +430,10 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
frame = (Frame*)calloc(1, sizeof(*frame));
if (frame == NULL) return PARSE_ERROR;
status = StoreFrame(1, &dmux->mem_, frame);
// For the single image case we allow parsing of a partial frame, but we need
// at least CHUNK_HEADER_SIZE for parsing.
status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame,
&has_vp8l_alpha);
if (status != PARSE_ERROR) {
const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
// Clear any alpha when the alpha flag is missing.
@ -421,10 +443,12 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
}
// Use the frame width/height as the canvas values for non-vp8x files.
// Also, set ALPHA_FLAG if this is a lossless image with alpha.
if (!dmux->is_ext_format_ && frame->width_ > 0 && frame->height_ > 0) {
dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
dmux->canvas_width_ = frame->width_;
dmux->canvas_height_ = frame->height_;
dmux->feature_flags_ |= has_vp8l_alpha ? ALPHA_FLAG : 0;
}
AddFrame(dmux, frame);
dmux->num_frames_ = 1;
@ -437,7 +461,7 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
MemBuffer* const mem = &dmux->mem_;
int loop_chunks = 0;
int anim_chunks = 0;
uint32_t vp8x_size;
ParseStatus status = PARSE_OK;
@ -445,17 +469,17 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
dmux->is_ext_format_ = 1;
Skip(mem, TAG_SIZE); // VP8X
vp8x_size = GetLE32(mem);
vp8x_size = ReadLE32(mem);
if (vp8x_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
if (vp8x_size < VP8X_CHUNK_SIZE) return PARSE_ERROR;
vp8x_size += vp8x_size & 1;
if (SizeIsInvalid(mem, vp8x_size)) return PARSE_ERROR;
if (MemDataSize(mem) < vp8x_size) return PARSE_NEED_MORE_DATA;
dmux->feature_flags_ = GetByte(mem);
dmux->feature_flags_ = ReadByte(mem);
Skip(mem, 3); // Reserved.
dmux->canvas_width_ = 1 + GetLE24s(mem);
dmux->canvas_height_ = 1 + GetLE24s(mem);
dmux->canvas_width_ = 1 + ReadLE24s(mem);
dmux->canvas_height_ = 1 + ReadLE24s(mem);
if (dmux->canvas_width_ * (uint64_t)dmux->canvas_height_ >= MAX_IMAGE_AREA) {
return PARSE_ERROR; // image final dimension is too large
}
@ -468,8 +492,8 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
do {
int store_chunk = 1;
const size_t chunk_start_offset = mem->start_;
const uint32_t fourcc = GetLE32(mem);
const uint32_t chunk_size = GetLE32(mem);
const uint32_t fourcc = ReadLE32(mem);
const uint32_t chunk_size = ReadLE32(mem);
const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1);
if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
@ -482,40 +506,50 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
case MKFOURCC('A', 'L', 'P', 'H'):
case MKFOURCC('V', 'P', '8', ' '):
case MKFOURCC('V', 'P', '8', 'L'): {
// check that this isn't an animation (all frames should be in an ANMF).
if (anim_chunks > 0) return PARSE_ERROR;
Rewind(mem, CHUNK_HEADER_SIZE);
status = ParseSingleImage(dmux);
break;
}
case MKFOURCC('L', 'O', 'O', 'P'): {
if (chunk_size_padded < LOOP_CHUNK_SIZE) return PARSE_ERROR;
case MKFOURCC('A', 'N', 'I', 'M'): {
if (chunk_size_padded < ANIM_CHUNK_SIZE) return PARSE_ERROR;
if (MemDataSize(mem) < chunk_size_padded) {
status = PARSE_NEED_MORE_DATA;
} else if (loop_chunks == 0) {
++loop_chunks;
dmux->loop_count_ = GetLE16s(mem);
Skip(mem, chunk_size_padded - LOOP_CHUNK_SIZE);
} else if (anim_chunks == 0) {
++anim_chunks;
dmux->bgcolor_ = ReadLE32(mem);
dmux->loop_count_ = ReadLE16s(mem);
Skip(mem, chunk_size_padded - ANIM_CHUNK_SIZE);
} else {
store_chunk = 0;
goto Skip;
}
break;
}
case MKFOURCC('F', 'R', 'M', ' '): {
status = ParseFrame(dmux, chunk_size_padded);
case MKFOURCC('A', 'N', 'M', 'F'): {
if (anim_chunks == 0) return PARSE_ERROR; // 'ANIM' precedes frames.
status = ParseAnimationFrame(dmux, chunk_size_padded);
break;
}
case MKFOURCC('T', 'I', 'L', 'E'): {
if (dmux->num_frames_ == 0) dmux->num_frames_ = 1;
status = ParseTile(dmux, chunk_size_padded);
#ifdef WEBP_EXPERIMENTAL_FEATURES
case MKFOURCC('F', 'R', 'G', 'M'): {
status = ParseFragment(dmux, chunk_size_padded);
break;
}
#endif
case MKFOURCC('I', 'C', 'C', 'P'): {
store_chunk = !!(dmux->feature_flags_ & ICCP_FLAG);
goto Skip;
}
case MKFOURCC('M', 'E', 'T', 'A'): {
store_chunk = !!(dmux->feature_flags_ & META_FLAG);
case MKFOURCC('X', 'M', 'P', ' '): {
store_chunk = !!(dmux->feature_flags_ & XMP_FLAG);
goto Skip;
}
case MKFOURCC('E', 'X', 'I', 'F'): {
store_chunk = !!(dmux->feature_flags_ & EXIF_FLAG);
goto Skip;
}
Skip:
@ -561,7 +595,7 @@ static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
}
static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
const int has_tiles = !!(dmux->feature_flags_ & TILE_FLAG);
const int has_fragments = !!(dmux->feature_flags_ & FRAGMENTS_FLAG);
const int has_frames = !!(dmux->feature_flags_ & ANIMATION_FLAG);
const Frame* f;
@ -573,15 +607,15 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
for (f = dmux->frames_; f != NULL; f = f->next_) {
const int cur_frame_set = f->frame_num_;
int frame_count = 0, tile_count = 0;
int frame_count = 0, fragment_count = 0;
// Check frame properties and if the image is composed of tiles that each
// fragment came from a 'TILE'.
// Check frame properties and if the image is composed of fragments that
// each fragment came from a fragment.
for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
const ChunkData* const image = f->img_components_;
const ChunkData* const alpha = f->img_components_ + 1;
if (!has_tiles && f->is_tile_) return 0;
if (!has_fragments && f->is_fragment_) return 0;
if (!has_frames && f->frame_num_ > 1) return 0;
if (f->x_offset_ < 0 || f->y_offset_ < 0) return 0;
if (f->complete_) {
@ -593,6 +627,9 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
if (f->width_ <= 0 || f->height_ <= 0) return 0;
} else {
// There shouldn't be a partial frame in a complete file.
if (dmux->state_ == WEBP_DEMUX_DONE) return 0;
// Ensure alpha precedes image bitstream.
if (alpha->size_ > 0 && image->size_ > 0 &&
alpha->offset_ > image->offset_) {
@ -602,11 +639,11 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
if (f->next_ != NULL) return 0;
}
tile_count += f->is_tile_;
fragment_count += f->is_fragment_;
++frame_count;
}
if (!has_tiles && frame_count > 1) return 0;
if (tile_count > 0 && frame_count != tile_count) return 0;
if (!has_fragments && frame_count > 1) return 0;
if (fragment_count > 0 && frame_count != fragment_count) return 0;
if (f == NULL) break;
}
return 1;
@ -618,8 +655,10 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
static void InitDemux(WebPDemuxer* const dmux, const MemBuffer* const mem) {
dmux->state_ = WEBP_DEMUX_PARSING_HEADER;
dmux->loop_count_ = 1;
dmux->bgcolor_ = 0xFFFFFFFF; // White background by default.
dmux->canvas_width_ = -1;
dmux->canvas_height_ = -1;
dmux->frames_tail_ = &dmux->frames_;
dmux->mem_ = *mem;
}
@ -632,9 +671,9 @@ WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
WebPDemuxer* dmux;
if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DEMUX_ABI_VERSION)) return NULL;
if (data == NULL || data->bytes_ == NULL || data->size_ == 0) return NULL;
if (data == NULL || data->bytes == NULL || data->size == 0) return NULL;
if (!InitMemBuffer(&mem, data->bytes_, data->size_)) return NULL;
if (!InitMemBuffer(&mem, data->bytes, data->size)) return NULL;
if (!ReadHeader(&mem)) return NULL;
partial = (mem.buf_size_ < mem.riff_end_);
@ -648,6 +687,7 @@ WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
if (!memcmp(parser->id, GetBuffer(&dmux->mem_), TAG_SIZE)) {
status = parser->parse(dmux);
if (status == PARSE_OK) dmux->state_ = WEBP_DEMUX_DONE;
if (status == PARSE_NEED_MORE_DATA && !partial) status = PARSE_ERROR;
if (status != PARSE_ERROR && !parser->valid(dmux)) status = PARSE_ERROR;
break;
}
@ -685,10 +725,12 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
if (dmux == NULL) return 0;
switch (feature) {
case WEBP_FF_FORMAT_FLAGS: return dmux->feature_flags_;
case WEBP_FF_CANVAS_WIDTH: return (uint32_t)dmux->canvas_width_;
case WEBP_FF_CANVAS_HEIGHT: return (uint32_t)dmux->canvas_height_;
case WEBP_FF_LOOP_COUNT: return (uint32_t)dmux->loop_count_;
case WEBP_FF_FORMAT_FLAGS: return dmux->feature_flags_;
case WEBP_FF_CANVAS_WIDTH: return (uint32_t)dmux->canvas_width_;
case WEBP_FF_CANVAS_HEIGHT: return (uint32_t)dmux->canvas_height_;
case WEBP_FF_LOOP_COUNT: return (uint32_t)dmux->loop_count_;
case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor_;
case WEBP_FF_FRAME_COUNT: return (uint32_t)dmux->num_frames_;
}
return 0;
}
@ -696,7 +738,8 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
// -----------------------------------------------------------------------------
// Frame iteration
// Find the first 'frame_num' frame. There may be multiple in a tiled frame.
// Find the first 'frame_num' frame. There may be multiple such frames in a
// fragmented frame.
static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
const Frame* f;
for (f = dmux->frames_; f != NULL; f = f->next_) {
@ -705,19 +748,19 @@ static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
return f;
}
// Returns tile 'tile_num' and the total count.
static const Frame* GetTile(
const Frame* const frame_set, int tile_num, int* const count) {
// Returns fragment 'fragment_num' and the total count.
static const Frame* GetFragment(
const Frame* const frame_set, int fragment_num, int* const count) {
const int this_frame = frame_set->frame_num_;
const Frame* f = frame_set;
const Frame* tile = NULL;
const Frame* fragment = NULL;
int total;
for (total = 0; f != NULL && f->frame_num_ == this_frame; f = f->next_) {
if (++total == tile_num) tile = f;
if (++total == fragment_num) fragment = f;
}
*count = total;
return tile;
return fragment;
}
static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
@ -747,27 +790,31 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
// Create a whole 'frame' from VP8 (+ alpha) or lossless.
static int SynthesizeFrame(const WebPDemuxer* const dmux,
const Frame* const first_frame,
int tile_num, WebPIterator* const iter) {
int fragment_num, WebPIterator* const iter) {
const uint8_t* const mem_buf = dmux->mem_.buf_;
int num_tiles;
int num_fragments;
size_t payload_size = 0;
const Frame* const tile = GetTile(first_frame, tile_num, &num_tiles);
const uint8_t* const payload = GetFramePayload(mem_buf, tile, &payload_size);
const Frame* const fragment =
GetFragment(first_frame, fragment_num, &num_fragments);
const uint8_t* const payload =
GetFramePayload(mem_buf, fragment, &payload_size);
if (payload == NULL) return 0;
iter->frame_num_ = first_frame->frame_num_;
iter->num_frames_ = dmux->num_frames_;
iter->tile_num_ = tile_num;
iter->num_tiles_ = num_tiles;
iter->x_offset_ = tile->x_offset_;
iter->y_offset_ = tile->y_offset_;
iter->width_ = tile->width_;
iter->height_ = tile->height_;
iter->duration_ = tile->duration_;
iter->complete_ = tile->complete_;
iter->tile_.bytes_ = payload;
iter->tile_.size_ = payload_size;
// TODO(jzern): adjust offsets for 'TILE's embedded in 'FRM 's
assert(first_frame != NULL);
iter->frame_num = first_frame->frame_num_;
iter->num_frames = dmux->num_frames_;
iter->fragment_num = fragment_num;
iter->num_fragments = num_fragments;
iter->x_offset = fragment->x_offset_;
iter->y_offset = fragment->y_offset_;
iter->width = fragment->width_;
iter->height = fragment->height_;
iter->duration = fragment->duration_;
iter->dispose_method = fragment->dispose_method_;
iter->complete = fragment->complete_;
iter->fragment.bytes = payload;
iter->fragment.size = payload_size;
// TODO(jzern): adjust offsets for 'FRGM's embedded in 'ANMF's
return 1;
}
@ -779,6 +826,8 @@ static int SetFrame(int frame_num, WebPIterator* const iter) {
if (frame_num == 0) frame_num = dmux->num_frames_;
frame = GetFrame(dmux, frame_num);
if (frame == NULL) return 0;
return SynthesizeFrame(dmux, frame, 1, iter);
}
@ -792,22 +841,22 @@ int WebPDemuxGetFrame(const WebPDemuxer* dmux, int frame, WebPIterator* iter) {
int WebPDemuxNextFrame(WebPIterator* iter) {
if (iter == NULL) return 0;
return SetFrame(iter->frame_num_ + 1, iter);
return SetFrame(iter->frame_num + 1, iter);
}
int WebPDemuxPrevFrame(WebPIterator* iter) {
if (iter == NULL) return 0;
if (iter->frame_num_ <= 1) return 0;
return SetFrame(iter->frame_num_ - 1, iter);
if (iter->frame_num <= 1) return 0;
return SetFrame(iter->frame_num - 1, iter);
}
int WebPDemuxSelectTile(WebPIterator* iter, int tile) {
if (iter != NULL && iter->private_ != NULL && tile > 0) {
int WebPDemuxSelectFragment(WebPIterator* iter, int fragment_num) {
if (iter != NULL && iter->private_ != NULL && fragment_num > 0) {
const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
const Frame* const frame = GetFrame(dmux, iter->frame_num_);
const Frame* const frame = GetFrame(dmux, iter->frame_num);
if (frame == NULL) return 0;
return SynthesizeFrame(dmux, frame, tile, iter);
return SynthesizeFrame(dmux, frame, fragment_num, iter);
}
return 0;
}
@ -856,10 +905,10 @@ static int SetChunk(const char fourcc[4], int chunk_num,
if (chunk_num <= count) {
const uint8_t* const mem_buf = dmux->mem_.buf_;
const Chunk* const chunk = GetChunk(dmux, fourcc, chunk_num);
iter->chunk_.bytes_ = mem_buf + chunk->data_.offset_ + CHUNK_HEADER_SIZE;
iter->chunk_.size_ = chunk->data_.size_ - CHUNK_HEADER_SIZE;
iter->num_chunks_ = count;
iter->chunk_num_ = chunk_num;
iter->chunk.bytes = mem_buf + chunk->data_.offset_ + CHUNK_HEADER_SIZE;
iter->chunk.size = chunk->data_.size_ - CHUNK_HEADER_SIZE;
iter->num_chunks = count;
iter->chunk_num = chunk_num;
return 1;
}
return 0;
@ -878,17 +927,17 @@ int WebPDemuxGetChunk(const WebPDemuxer* dmux,
int WebPDemuxNextChunk(WebPChunkIterator* iter) {
if (iter != NULL) {
const char* const fourcc =
(const char*)iter->chunk_.bytes_ - CHUNK_HEADER_SIZE;
return SetChunk(fourcc, iter->chunk_num_ + 1, iter);
(const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
return SetChunk(fourcc, iter->chunk_num + 1, iter);
}
return 0;
}
int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
if (iter != NULL && iter->chunk_num_ > 1) {
if (iter != NULL && iter->chunk_num > 1) {
const char* const fourcc =
(const char*)iter->chunk_.bytes_ - CHUNK_HEADER_SIZE;
return SetChunk(fourcc, iter->chunk_num_ - 1, iter);
(const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
return SetChunk(fourcc, iter->chunk_num - 1, iter);
}
return 0;
}

@ -426,11 +426,16 @@ static void HE8uv(uint8_t *dst) { // horizontal
}
// helper for chroma-DC predictions
static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) {
static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
int j;
#ifndef WEBP_REFERENCE_IMPLEMENTATION
const uint64_t v = (uint64_t)value * 0x0101010101010101ULL;
for (j = 0; j < 8; ++j) {
*(uint64_t*)(dst + j * BPS) = v;
}
#else
for (j = 0; j < 8; ++j) memset(dst + j * BPS, value, 8);
#endif
}
static void DC8uv(uint8_t *dst) { // DC
@ -439,7 +444,7 @@ static void DC8uv(uint8_t *dst) { // DC
for (i = 0; i < 8; ++i) {
dc0 += dst[i - BPS] + dst[-1 + i * BPS];
}
Put8x8uv((uint64_t)((dc0 >> 4) * 0x0101010101010101ULL), dst);
Put8x8uv(dc0 >> 4, dst);
}
static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples
@ -448,7 +453,7 @@ static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples
for (i = 0; i < 8; ++i) {
dc0 += dst[i - BPS];
}
Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
Put8x8uv(dc0 >> 3, dst);
}
static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples
@ -457,11 +462,11 @@ static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples
for (i = 0; i < 8; ++i) {
dc0 += dst[-1 + i * BPS];
}
Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
Put8x8uv(dc0 >> 3, dst);
}
static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing
Put8x8uv(0x8080808080808080ULL, dst);
Put8x8uv(0x80, dst);
}
//------------------------------------------------------------------------------

@ -79,7 +79,7 @@ extern "C" {
"vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
#define STORE8x2(c1, c2, p,stride) \
#define STORE8x2(c1, c2, p, stride) \
"vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \
@ -155,6 +155,9 @@ static void SimpleHFilter16iNEON(uint8_t* p, int stride, int thresh) {
}
}
//-----------------------------------------------------------------------------
// Inverse transforms (Paragraph 14.4)
static void TransformOneNEON(const int16_t *in, uint8_t *dst) {
const int kBPS = BPS;
const int16_t constants[] = {20091, 17734, 0, 0};
@ -311,6 +314,73 @@ static void TransformTwoNEON(const int16_t* in, uint8_t* dst, int do_two) {
}
}
static void TransformWHT(const int16_t* in, int16_t* out) {
const int kStep = 32; // The store is only incrementing the pointer as if we
// had stored a single byte.
__asm__ volatile (
// part 1
// load data into q0, q1
"vld1.16 {q0, q1}, [%[in]] \n"
"vaddl.s16 q2, d0, d3 \n" // a0 = in[0] + in[12]
"vaddl.s16 q3, d1, d2 \n" // a1 = in[4] + in[8]
"vsubl.s16 q4, d1, d2 \n" // a2 = in[4] - in[8]
"vsubl.s16 q5, d0, d3 \n" // a3 = in[0] - in[12]
"vadd.s32 q0, q2, q3 \n" // tmp[0] = a0 + a1
"vsub.s32 q2, q2, q3 \n" // tmp[8] = a0 - a1
"vadd.s32 q1, q5, q4 \n" // tmp[4] = a3 + a2
"vsub.s32 q3, q5, q4 \n" // tmp[12] = a3 - a2
// Transpose
// q0 = tmp[0, 4, 8, 12], q1 = tmp[2, 6, 10, 14]
// q2 = tmp[1, 5, 9, 13], q3 = tmp[3, 7, 11, 15]
"vswp d1, d4 \n" // vtrn.64 q0, q2
"vswp d3, d6 \n" // vtrn.64 q1, q3
"vtrn.32 q0, q1 \n"
"vtrn.32 q2, q3 \n"
"vmov.s32 q4, #3 \n" // dc = 3
"vadd.s32 q0, q0, q4 \n" // dc = tmp[0] + 3
"vadd.s32 q6, q0, q3 \n" // a0 = dc + tmp[3]
"vadd.s32 q7, q1, q2 \n" // a1 = tmp[1] + tmp[2]
"vsub.s32 q8, q1, q2 \n" // a2 = tmp[1] - tmp[2]
"vsub.s32 q9, q0, q3 \n" // a3 = dc - tmp[3]
"vadd.s32 q0, q6, q7 \n"
"vshrn.s32 d0, q0, #3 \n" // (a0 + a1) >> 3
"vadd.s32 q1, q9, q8 \n"
"vshrn.s32 d1, q1, #3 \n" // (a3 + a2) >> 3
"vsub.s32 q2, q6, q7 \n"
"vshrn.s32 d2, q2, #3 \n" // (a0 - a1) >> 3
"vsub.s32 q3, q9, q8 \n"
"vshrn.s32 d3, q3, #3 \n" // (a3 - a2) >> 3
// set the results to output
"vst1.16 d0[0], [%[out]], %[kStep] \n"
"vst1.16 d1[0], [%[out]], %[kStep] \n"
"vst1.16 d2[0], [%[out]], %[kStep] \n"
"vst1.16 d3[0], [%[out]], %[kStep] \n"
"vst1.16 d0[1], [%[out]], %[kStep] \n"
"vst1.16 d1[1], [%[out]], %[kStep] \n"
"vst1.16 d2[1], [%[out]], %[kStep] \n"
"vst1.16 d3[1], [%[out]], %[kStep] \n"
"vst1.16 d0[2], [%[out]], %[kStep] \n"
"vst1.16 d1[2], [%[out]], %[kStep] \n"
"vst1.16 d2[2], [%[out]], %[kStep] \n"
"vst1.16 d3[2], [%[out]], %[kStep] \n"
"vst1.16 d0[3], [%[out]], %[kStep] \n"
"vst1.16 d1[3], [%[out]], %[kStep] \n"
"vst1.16 d2[3], [%[out]], %[kStep] \n"
"vst1.16 d3[3], [%[out]], %[kStep] \n"
: [out] "+r"(out) // modified registers
: [in] "r"(in), [kStep] "r"(kStep) // constants
: "memory", "q0", "q1", "q2", "q3", "q4",
"q5", "q6", "q7", "q8", "q9" // clobbered
);
}
#endif // WEBP_USE_NEON
//------------------------------------------------------------------------------
@ -321,6 +391,7 @@ extern void VP8DspInitNEON(void);
void VP8DspInitNEON(void) {
#if defined(WEBP_USE_NEON)
VP8Transform = TransformTwoNEON;
VP8TransformWHT = TransformWHT;
VP8SimpleVFilter16 = SimpleVFilter16NEON;
VP8SimpleHFilter16 = SimpleHFilter16NEON;

@ -194,7 +194,7 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
// Add inverse transform to 'dst' and store.
{
const __m128i zero = _mm_set1_epi16(0);
const __m128i zero = _mm_setzero_si128();
// Load the reference(s).
__m128i dst0, dst1, dst2, dst3;
if (do_two) {
@ -278,14 +278,14 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
#define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) { \
const __m128i zero = _mm_setzero_si128(); \
const __m128i t1 = MM_ABS(p1, p0); \
const __m128i t2 = MM_ABS(q1, q0); \
const __m128i t_1 = MM_ABS(p1, p0); \
const __m128i t_2 = MM_ABS(q1, q0); \
\
const __m128i h = _mm_set1_epi8(hev_thresh); \
const __m128i t3 = _mm_subs_epu8(t1, h); /* abs(p1 - p0) - hev_tresh */ \
const __m128i t4 = _mm_subs_epu8(t2, h); /* abs(q1 - q0) - hev_tresh */ \
const __m128i t_3 = _mm_subs_epu8(t_1, h); /* abs(p1 - p0) - hev_tresh */ \
const __m128i t_4 = _mm_subs_epu8(t_2, h); /* abs(q1 - q0) - hev_tresh */ \
\
not_hev = _mm_or_si128(t3, t4); \
not_hev = _mm_or_si128(t_3, t_4); \
not_hev = _mm_cmpeq_epi8(not_hev, zero); /* not_hev <= t1 && not_hev <= t2 */\
}
@ -314,13 +314,13 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
// Updates values of 2 pixels at MB edge during complex filtering.
// Update operations:
// q = q - a and p = p + a; where a = [(a_hi >> 7), (a_lo >> 7)]
// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
#define UPDATE_2PIXELS(pi, qi, a_lo, a_hi) { \
const __m128i a_lo7 = _mm_srai_epi16(a_lo, 7); \
const __m128i a_hi7 = _mm_srai_epi16(a_hi, 7); \
const __m128i a = _mm_packs_epi16(a_lo7, a_hi7); \
pi = _mm_adds_epi8(pi, a); \
qi = _mm_subs_epi8(qi, a); \
const __m128i delta = _mm_packs_epi16(a_lo7, a_hi7); \
pi = _mm_adds_epi8(pi, delta); \
qi = _mm_subs_epi8(qi, delta); \
}
static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,

@ -49,8 +49,6 @@ extern VP8CPUInfo VP8GetCPUInfo;
//------------------------------------------------------------------------------
// Encoding
int VP8GetAlpha(const int histo[]);
// Transforms
// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
@ -85,10 +83,11 @@ typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
int n, const struct VP8Matrix* const mtx);
extern VP8QuantizeBlock VP8EncQuantizeBlock;
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block);
// Collect histogram for susceptibility calculation and accumulate in histo[].
struct VP8Histogram;
typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
struct VP8Histogram* const histo);
extern const int VP8DspScan[16 + 4 + 4];
extern VP8CHisto VP8CollectHistogram;
@ -104,7 +103,7 @@ extern VP8DecIdct2 VP8Transform;
extern VP8DecIdct VP8TransformUV;
extern VP8DecIdct VP8TransformDC;
extern VP8DecIdct VP8TransformDCUV;
extern void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
extern VP8WHT VP8TransformWHT;
// *dst is the destination block, with stride BPS. Boundary samples are
// assumed accessible when needed.
@ -159,6 +158,9 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
// Initializes SSE2 version of the fancy upsamplers.
void WebPInitUpsamplersSSE2(void);
// NEON version
void WebPInitUpsamplersNEON(void);
#endif // FANCY_UPSAMPLING
// Point-sampling methods.
@ -200,6 +202,7 @@ extern void (*WebPApplyAlphaMultiply4444)(
void WebPInitPremultiply(void);
void WebPInitPremultiplySSE2(void); // should not be called directly.
void WebPInitPremultiplyNEON(void);
//------------------------------------------------------------------------------

@ -17,31 +17,18 @@
extern "C" {
#endif
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
static int ClipAlpha(int alpha) {
return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
}
int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {
int num = 0, den = 0, val = 0;
int k;
int alpha;
// note: changing this loop to avoid the numerous "k + 1" slows things down.
for (k = 0; k < MAX_COEFF_THRESH; ++k) {
if (histo[k + 1]) {
val += histo[k + 1];
num += val * (k + 1);
den += (k + 1) * (k + 1);
}
}
// we scale the value to a usable [0..255] range
alpha = den ? 10 * num / den - 5 : 0;
return ClipAlpha(alpha);
static WEBP_INLINE int clip_max(int v, int max) {
return (v > max) ? max : v;
}
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
const int VP8DspScan[16 + 4 + 4] = {
// Luma
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
@ -53,27 +40,23 @@ const int VP8DspScan[16 + 4 + 4] = {
8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
};
static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block) {
int histo[MAX_COEFF_THRESH + 1] = { 0 };
int16_t out[16];
int j, k;
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
int j;
for (j = start_block; j < end_block; ++j) {
VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
int k;
int16_t out[16];
// Convert coefficients to bin (within out[]).
for (k = 0; k < 16; ++k) {
const int v = abs(out[k]) >> 2;
out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v;
}
VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
// Use bin to update histogram.
// Convert coefficients to bin.
for (k = 0; k < 16; ++k) {
histo[out[k]]++;
const int v = abs(out[k]) >> 3; // TODO(skal): add rounding?
const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
histo->distribution[clipped_value]++;
}
}
return VP8GetAlpha(histo);
}
//------------------------------------------------------------------------------
@ -89,15 +72,12 @@ static void InitTables(void) {
if (!tables_ok) {
int i;
for (i = -255; i <= 255 + 255; ++i) {
clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
clip1[255 + i] = clip_8b(i);
}
tables_ok = 1;
}
}
static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;
}
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
@ -154,25 +134,25 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
int i;
int tmp[16];
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
const int d0 = src[0] - ref[0];
const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255])
const int d1 = src[1] - ref[1];
const int d2 = src[2] - ref[2];
const int d3 = src[3] - ref[3];
const int a0 = (d0 + d3) << 3;
const int a1 = (d1 + d2) << 3;
const int a2 = (d1 - d2) << 3;
const int a3 = (d0 - d3) << 3;
tmp[0 + i * 4] = (a0 + a1);
tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12;
tmp[2 + i * 4] = (a0 - a1);
tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 7500) >> 12;
const int a0 = (d0 + d3); // 10b [-510,510]
const int a1 = (d1 + d2);
const int a2 = (d1 - d2);
const int a3 = (d0 - d3);
tmp[0 + i * 4] = (a0 + a1) << 3; // 14b [-8160,8160]
tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542]
tmp[2 + i * 4] = (a0 - a1) << 3;
tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9;
}
for (i = 0; i < 4; ++i) {
const int a0 = (tmp[0 + i] + tmp[12 + i]);
const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b
const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
const int a3 = (tmp[0 + i] - tmp[12 + i]);
out[0 + i] = (a0 + a1 + 7) >> 4;
out[0 + i] = (a0 + a1 + 7) >> 4; // 12b
out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
out[8 + i] = (a0 - a1 + 7) >> 4;
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
@ -589,30 +569,30 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
int i;
// horizontal pass
for (i = 0; i < 4; ++i, in += BPS) {
const int a0 = (in[0] + in[2]) << 2;
const int a1 = (in[1] + in[3]) << 2;
const int a2 = (in[1] - in[3]) << 2;
const int a3 = (in[0] - in[2]) << 2;
tmp[0 + i * 4] = a0 + a1 + (a0 != 0);
const int a0 = in[0] + in[2];
const int a1 = in[1] + in[3];
const int a2 = in[1] - in[3];
const int a3 = in[0] - in[2];
tmp[0 + i * 4] = a0 + a1;
tmp[1 + i * 4] = a3 + a2;
tmp[2 + i * 4] = a3 - a2;
tmp[3 + i * 4] = a0 - a1;
}
// vertical pass
for (i = 0; i < 4; ++i, ++w) {
const int a0 = (tmp[0 + i] + tmp[8 + i]);
const int a1 = (tmp[4 + i] + tmp[12+ i]);
const int a2 = (tmp[4 + i] - tmp[12+ i]);
const int a3 = (tmp[0 + i] - tmp[8 + i]);
const int a0 = tmp[0 + i] + tmp[8 + i];
const int a1 = tmp[4 + i] + tmp[12+ i];
const int a2 = tmp[4 + i] - tmp[12+ i];
const int a3 = tmp[0 + i] - tmp[8 + i];
const int b0 = a0 + a1;
const int b1 = a3 + a2;
const int b2 = a3 - a2;
const int b3 = a0 - a1;
// abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
sum += w[ 0] * ((abs(b0) + 3) >> 3);
sum += w[ 4] * ((abs(b1) + 3) >> 3);
sum += w[ 8] * ((abs(b2) + 3) >> 3);
sum += w[12] * ((abs(b3) + 3) >> 3);
sum += w[ 0] * abs(b0);
sum += w[ 4] * abs(b1);
sum += w[ 8] * abs(b2);
sum += w[12] * abs(b3);
}
return sum;
}
@ -621,7 +601,7 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int sum1 = TTransform(a, w);
const int sum2 = TTransform(b, w);
return (abs(sum2 - sum1) + 8) >> 4;
return abs(sum2 - sum1) >> 5;
}
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
@ -651,13 +631,13 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
for (; n < 16; ++n) {
const int j = kZigzag[n];
const int sign = (in[j] < 0);
int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
if (coeff > 2047) coeff = 2047;
const int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
if (coeff > mtx->zthresh_[j]) {
const int Q = mtx->q_[j];
const int iQ = mtx->iq_[j];
const int B = mtx->bias_[j];
out[n] = QUANTDIV(coeff, iQ, B);
if (out[n] > MAX_LEVEL) out[n] = MAX_LEVEL;
if (sign) out[n] = -out[n];
in[j] = out[n] * Q;
if (out[n]) last = n;
@ -706,6 +686,7 @@ VP8QuantizeBlock VP8EncQuantizeBlock;
VP8BlockCopy VP8Copy4x4;
extern void VP8EncDspInitSSE2(void);
extern void VP8EncDspInitNEON(void);
void VP8EncDspInit(void) {
InitTables();
@ -734,6 +715,10 @@ void VP8EncDspInit(void) {
if (VP8GetCPUInfo(kSSE2)) {
VP8EncDspInitSSE2();
}
#elif defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
VP8EncDspInitNEON();
}
#endif
}
}

@ -0,0 +1,661 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
// ARM NEON version of speed-critical encoding functions.
//
// adapted from libvpx (http://www.webmproject.org/code/)
#include "./dsp.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if defined(WEBP_USE_NEON)
#include "../enc/vp8enci.h"
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
// Inverse transform.
// This code is pretty much the same as TransformOneNEON in the decoder, except
// for subtraction to *ref. See the comments there for algorithmic explanations.
static void ITransformOne(const uint8_t* ref,
const int16_t* in, uint8_t* dst) {
const int kBPS = BPS;
const int16_t kC1C2[] = { 20091, 17734, 0, 0 }; // kC1 / (kC2 >> 1) / 0 / 0
__asm__ volatile (
"vld1.16 {q1, q2}, [%[in]] \n"
"vld1.16 {d0}, [%[kC1C2]] \n"
// d2: in[0]
// d3: in[8]
// d4: in[4]
// d5: in[12]
"vswp d3, d4 \n"
// q8 = {in[4], in[12]} * kC1 * 2 >> 16
// q9 = {in[4], in[12]} * kC2 >> 16
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
// d22 = a = in[0] + in[8]
// d23 = b = in[0] - in[8]
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
// q8 = in[4]/[12] * kC1 >> 16
"vshr.s16 q8, q8, #1 \n"
// Add {in[4], in[12]} back after the multiplication.
"vqadd.s16 q8, q2, q8 \n"
// d20 = c = in[4]*kC2 - in[12]*kC1
// d21 = d = in[4]*kC1 + in[12]*kC2
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
// d2 = tmp[0] = a + d
// d3 = tmp[1] = b + c
// d4 = tmp[2] = b - c
// d5 = tmp[3] = a - d
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vswp d3, d4 \n"
// q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
// q9 = {tmp[4], tmp[12]} * kC2 >> 16
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
// d22 = a = tmp[0] + tmp[8]
// d23 = b = tmp[0] - tmp[8]
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
"vshr.s16 q8, q8, #1 \n"
"vqadd.s16 q8, q2, q8 \n"
// d20 = c = in[4]*kC2 - in[12]*kC1
// d21 = d = in[4]*kC1 + in[12]*kC2
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
// d2 = tmp[0] = a + d
// d3 = tmp[1] = b + c
// d4 = tmp[2] = b - c
// d5 = tmp[3] = a - d
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
"vld1.32 d6[0], [%[ref]], %[kBPS] \n"
"vld1.32 d6[1], [%[ref]], %[kBPS] \n"
"vld1.32 d7[0], [%[ref]], %[kBPS] \n"
"vld1.32 d7[1], [%[ref]], %[kBPS] \n"
"sub %[ref], %[ref], %[kBPS], lsl #2 \n"
// (val) + 4 >> 3
"vrshr.s16 d2, d2, #3 \n"
"vrshr.s16 d3, d3, #3 \n"
"vrshr.s16 d4, d4, #3 \n"
"vrshr.s16 d5, d5, #3 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
// Must accumulate before saturating
"vmovl.u8 q8, d6 \n"
"vmovl.u8 q9, d7 \n"
"vqadd.s16 q1, q1, q8 \n"
"vqadd.s16 q2, q2, q9 \n"
"vqmovun.s16 d0, q1 \n"
"vqmovun.s16 d1, q2 \n"
"vst1.32 d0[0], [%[dst]], %[kBPS] \n"
"vst1.32 d0[1], [%[dst]], %[kBPS] \n"
"vst1.32 d1[0], [%[dst]], %[kBPS] \n"
"vst1.32 d1[1], [%[dst]] \n"
: [in] "+r"(in), [dst] "+r"(dst) // modified registers
: [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref) // constants
: "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" // clobbered
);
}
static void ITransform(const uint8_t* ref,
const int16_t* in, uint8_t* dst, int do_two) {
ITransformOne(ref, in, dst);
if (do_two) {
ITransformOne(ref + 4, in + 16, dst + 4);
}
}
// Same code as dec_neon.c
static void ITransformWHT(const int16_t* in, int16_t* out) {
const int kStep = 32; // The store is only incrementing the pointer as if we
// had stored a single byte.
__asm__ volatile (
// part 1
// load data into q0, q1
"vld1.16 {q0, q1}, [%[in]] \n"
"vaddl.s16 q2, d0, d3 \n" // a0 = in[0] + in[12]
"vaddl.s16 q3, d1, d2 \n" // a1 = in[4] + in[8]
"vsubl.s16 q4, d1, d2 \n" // a2 = in[4] - in[8]
"vsubl.s16 q5, d0, d3 \n" // a3 = in[0] - in[12]
"vadd.s32 q0, q2, q3 \n" // tmp[0] = a0 + a1
"vsub.s32 q2, q2, q3 \n" // tmp[8] = a0 - a1
"vadd.s32 q1, q5, q4 \n" // tmp[4] = a3 + a2
"vsub.s32 q3, q5, q4 \n" // tmp[12] = a3 - a2
// Transpose
// q0 = tmp[0, 4, 8, 12], q1 = tmp[2, 6, 10, 14]
// q2 = tmp[1, 5, 9, 13], q3 = tmp[3, 7, 11, 15]
"vswp d1, d4 \n" // vtrn.64 q0, q2
"vswp d3, d6 \n" // vtrn.64 q1, q3
"vtrn.32 q0, q1 \n"
"vtrn.32 q2, q3 \n"
"vmov.s32 q4, #3 \n" // dc = 3
"vadd.s32 q0, q0, q4 \n" // dc = tmp[0] + 3
"vadd.s32 q6, q0, q3 \n" // a0 = dc + tmp[3]
"vadd.s32 q7, q1, q2 \n" // a1 = tmp[1] + tmp[2]
"vsub.s32 q8, q1, q2 \n" // a2 = tmp[1] - tmp[2]
"vsub.s32 q9, q0, q3 \n" // a3 = dc - tmp[3]
"vadd.s32 q0, q6, q7 \n"
"vshrn.s32 d0, q0, #3 \n" // (a0 + a1) >> 3
"vadd.s32 q1, q9, q8 \n"
"vshrn.s32 d1, q1, #3 \n" // (a3 + a2) >> 3
"vsub.s32 q2, q6, q7 \n"
"vshrn.s32 d2, q2, #3 \n" // (a0 - a1) >> 3
"vsub.s32 q3, q9, q8 \n"
"vshrn.s32 d3, q3, #3 \n" // (a3 - a2) >> 3
// set the results to output
"vst1.16 d0[0], [%[out]], %[kStep] \n"
"vst1.16 d1[0], [%[out]], %[kStep] \n"
"vst1.16 d2[0], [%[out]], %[kStep] \n"
"vst1.16 d3[0], [%[out]], %[kStep] \n"
"vst1.16 d0[1], [%[out]], %[kStep] \n"
"vst1.16 d1[1], [%[out]], %[kStep] \n"
"vst1.16 d2[1], [%[out]], %[kStep] \n"
"vst1.16 d3[1], [%[out]], %[kStep] \n"
"vst1.16 d0[2], [%[out]], %[kStep] \n"
"vst1.16 d1[2], [%[out]], %[kStep] \n"
"vst1.16 d2[2], [%[out]], %[kStep] \n"
"vst1.16 d3[2], [%[out]], %[kStep] \n"
"vst1.16 d0[3], [%[out]], %[kStep] \n"
"vst1.16 d1[3], [%[out]], %[kStep] \n"
"vst1.16 d2[3], [%[out]], %[kStep] \n"
"vst1.16 d3[3], [%[out]], %[kStep] \n"
: [out] "+r"(out) // modified registers
: [in] "r"(in), [kStep] "r"(kStep) // constants
: "memory", "q0", "q1", "q2", "q3", "q4",
"q5", "q6", "q7", "q8", "q9" // clobbered
);
}
// Forward transform.
// adapted from vp8/encoder/arm/neon/shortfdct_neon.asm
static const int16_t kCoeff16[] = {
5352, 5352, 5352, 5352, 2217, 2217, 2217, 2217
};
static const int32_t kCoeff32[] = {
1812, 1812, 1812, 1812,
937, 937, 937, 937,
12000, 12000, 12000, 12000,
51000, 51000, 51000, 51000
};
static void FTransform(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
const int kBPS = BPS;
const uint8_t* src_ptr = src;
const uint8_t* ref_ptr = ref;
const int16_t* coeff16 = kCoeff16;
const int32_t* coeff32 = kCoeff32;
__asm__ volatile (
// load src into q4, q5 in high half
"vld1.8 {d8}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d10}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d9}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d11}, [%[src_ptr]] \n"
// load ref into q6, q7 in high half
"vld1.8 {d12}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d14}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d13}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d15}, [%[ref_ptr]] \n"
// Pack the high values in to q4 and q6
"vtrn.32 q4, q5 \n"
"vtrn.32 q6, q7 \n"
// d[0-3] = src - ref
"vsubl.u8 q0, d8, d12 \n"
"vsubl.u8 q1, d9, d13 \n"
// load coeff16 into q8(d16=5352, d17=2217)
"vld1.16 {q8}, [%[coeff16]] \n"
// load coeff32 high half into q9 = 1812, q10 = 937
"vld1.32 {q9, q10}, [%[coeff32]]! \n"
// load coeff32 low half into q11=12000, q12=51000
"vld1.32 {q11,q12}, [%[coeff32]] \n"
// part 1
// Transpose. Register dN is the same as dN in C
"vtrn.32 d0, d2 \n"
"vtrn.32 d1, d3 \n"
"vtrn.16 d0, d1 \n"
"vtrn.16 d2, d3 \n"
"vadd.s16 d4, d0, d3 \n" // a0 = d0 + d3
"vadd.s16 d5, d1, d2 \n" // a1 = d1 + d2
"vsub.s16 d6, d1, d2 \n" // a2 = d1 - d2
"vsub.s16 d7, d0, d3 \n" // a3 = d0 - d3
"vadd.s16 d0, d4, d5 \n" // a0 + a1
"vshl.s16 d0, d0, #3 \n" // temp[0+i*4] = (a0+a1) << 3
"vsub.s16 d2, d4, d5 \n" // a0 - a1
"vshl.s16 d2, d2, #3 \n" // (temp[2+i*4] = (a0-a1) << 3
"vmlal.s16 q9, d7, d16 \n" // a3*5352 + 1812
"vmlal.s16 q10, d7, d17 \n" // a3*2217 + 937
"vmlal.s16 q9, d6, d17 \n" // a2*2217 + a3*5352 + 1812
"vmlsl.s16 q10, d6, d16 \n" // a3*2217 + 937 - a2*5352
// temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
// temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
"vshrn.s32 d1, q9, #9 \n"
"vshrn.s32 d3, q10, #9 \n"
// part 2
// transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
"vtrn.32 d0, d2 \n"
"vtrn.32 d1, d3 \n"
"vtrn.16 d0, d1 \n"
"vtrn.16 d2, d3 \n"
"vmov.s16 d26, #7 \n"
"vadd.s16 d4, d0, d3 \n" // a1 = ip[0] + ip[12]
"vadd.s16 d5, d1, d2 \n" // b1 = ip[4] + ip[8]
"vsub.s16 d6, d1, d2 \n" // c1 = ip[4] - ip[8]
"vadd.s16 d4, d4, d26 \n" // a1 + 7
"vsub.s16 d7, d0, d3 \n" // d1 = ip[0] - ip[12]
"vadd.s16 d0, d4, d5 \n" // op[0] = a1 + b1 + 7
"vsub.s16 d2, d4, d5 \n" // op[8] = a1 - b1 + 7
"vmlal.s16 q11, d7, d16 \n" // d1*5352 + 12000
"vmlal.s16 q12, d7, d17 \n" // d1*2217 + 51000
"vceq.s16 d4, d7, #0 \n"
"vshr.s16 d0, d0, #4 \n"
"vshr.s16 d2, d2, #4 \n"
"vmlal.s16 q11, d6, d17 \n" // c1*2217 + d1*5352 + 12000
"vmlsl.s16 q12, d6, d16 \n" // d1*2217 - c1*5352 + 51000
"vmvn.s16 d4, d4 \n"
// op[4] = (c1*2217 + d1*5352 + 12000)>>16
"vshrn.s32 d1, q11, #16 \n"
// op[4] += (d1!=0)
"vsub.s16 d1, d1, d4 \n"
// op[12]= (d1*2217 - c1*5352 + 51000)>>16
"vshrn.s32 d3, q12, #16 \n"
// set result to out array
"vst1.16 {q0, q1}, [%[out]] \n"
: [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
[coeff32] "+r"(coeff32) // modified registers
: [kBPS] "r"(kBPS), [coeff16] "r"(coeff16),
[out] "r"(out) // constants
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "q11", "q12", "q13" // clobbered
);
}
static void FTransformWHT(const int16_t* in, int16_t* out) {
const int kStep = 32;
__asm__ volatile (
// d0 = in[0 * 16] , d1 = in[1 * 16]
// d2 = in[2 * 16] , d3 = in[3 * 16]
"vld1.16 d0[0], [%[in]], %[kStep] \n"
"vld1.16 d1[0], [%[in]], %[kStep] \n"
"vld1.16 d2[0], [%[in]], %[kStep] \n"
"vld1.16 d3[0], [%[in]], %[kStep] \n"
"vld1.16 d0[1], [%[in]], %[kStep] \n"
"vld1.16 d1[1], [%[in]], %[kStep] \n"
"vld1.16 d2[1], [%[in]], %[kStep] \n"
"vld1.16 d3[1], [%[in]], %[kStep] \n"
"vld1.16 d0[2], [%[in]], %[kStep] \n"
"vld1.16 d1[2], [%[in]], %[kStep] \n"
"vld1.16 d2[2], [%[in]], %[kStep] \n"
"vld1.16 d3[2], [%[in]], %[kStep] \n"
"vld1.16 d0[3], [%[in]], %[kStep] \n"
"vld1.16 d1[3], [%[in]], %[kStep] \n"
"vld1.16 d2[3], [%[in]], %[kStep] \n"
"vld1.16 d3[3], [%[in]], %[kStep] \n"
"vaddl.s16 q2, d0, d2 \n"
"vshl.s32 q2, q2, #2 \n" // a0=(in[0*16]+in[2*16])<<2
"vaddl.s16 q3, d1, d3 \n"
"vshl.s32 q3, q3, #2 \n" // a1=(in[1*16]+in[3*16])<<2
"vsubl.s16 q4, d1, d3 \n"
"vshl.s32 q4, q4, #2 \n" // a2=(in[1*16]-in[3*16])<<2
"vsubl.s16 q5, d0, d2 \n"
"vshl.s32 q5, q5, #2 \n" // a3=(in[0*16]-in[2*16])<<2
"vceq.s32 q10, q2, #0 \n"
"vmvn.s32 q10, q10 \n" // (a0 != 0)
"vqadd.s32 q6, q2, q3 \n" // (a0 + a1)
"vqsub.s32 q6, q6, q10 \n" // (a0 + a1) + (a0 != 0)
"vqadd.s32 q7, q5, q4 \n" // a3 + a2
"vqsub.s32 q8, q5, q4 \n" // a3 - a2
"vqsub.s32 q9, q2, q3 \n" // a0 - a1
// Transpose
// q6 = tmp[0, 1, 2, 3] ; q7 = tmp[ 4, 5, 6, 7]
// q8 = tmp[8, 9, 10, 11] ; q9 = tmp[12, 13, 14, 15]
"vswp d13, d16 \n" // vtrn.64 q0, q2
"vswp d15, d18 \n" // vtrn.64 q1, q3
"vtrn.32 q6, q7 \n"
"vtrn.32 q8, q9 \n"
"vqadd.s32 q0, q6, q8 \n" // a0 = tmp[0] + tmp[8]
"vqadd.s32 q1, q7, q9 \n" // a1 = tmp[4] + tmp[12]
"vqsub.s32 q2, q7, q9 \n" // a2 = tmp[4] - tmp[12]
"vqsub.s32 q3, q6, q8 \n" // a3 = tmp[0] - tmp[8]
"vqadd.s32 q4, q0, q1 \n" // b0 = a0 + a1
"vqadd.s32 q5, q3, q2 \n" // b1 = a3 + a2
"vqsub.s32 q6, q3, q2 \n" // b2 = a3 - a2
"vqsub.s32 q7, q0, q1 \n" // b3 = a0 - a1
"vmov.s32 q0, #3 \n" // q0 = 3
"vcgt.s32 q1, q4, #0 \n" // (b0>0)
"vqsub.s32 q2, q4, q1 \n" // (b0+(b0>0))
"vqadd.s32 q3, q2, q0 \n" // (b0+(b0>0)+3)
"vshrn.s32 d18, q3, #3 \n" // (b0+(b0>0)+3) >> 3
"vcgt.s32 q1, q5, #0 \n" // (b1>0)
"vqsub.s32 q2, q5, q1 \n" // (b1+(b1>0))
"vqadd.s32 q3, q2, q0 \n" // (b1+(b1>0)+3)
"vshrn.s32 d19, q3, #3 \n" // (b1+(b1>0)+3) >> 3
"vcgt.s32 q1, q6, #0 \n" // (b2>0)
"vqsub.s32 q2, q6, q1 \n" // (b2+(b2>0))
"vqadd.s32 q3, q2, q0 \n" // (b2+(b2>0)+3)
"vshrn.s32 d20, q3, #3 \n" // (b2+(b2>0)+3) >> 3
"vcgt.s32 q1, q7, #0 \n" // (b3>0)
"vqsub.s32 q2, q7, q1 \n" // (b3+(b3>0))
"vqadd.s32 q3, q2, q0 \n" // (b3+(b3>0)+3)
"vshrn.s32 d21, q3, #3 \n" // (b3+(b3>0)+3) >> 3
"vst1.16 {q9, q10}, [%[out]] \n"
: [in] "+r"(in)
: [kStep] "r"(kStep), [out] "r"(out)
: "memory", "q0", "q1", "q2", "q3", "q4", "q5",
"q6", "q7", "q8", "q9", "q10" // clobbered
) ;
}
//------------------------------------------------------------------------------
// Texture distortion
//
// We try to match the spectral content (weighted) between source and
// reconstructed samples.
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// This uses a TTransform helper function in C
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int kBPS = BPS;
const uint8_t* A = a;
const uint8_t* B = b;
const uint16_t* W = w;
int sum;
__asm__ volatile (
"vld1.32 d0[0], [%[a]], %[kBPS] \n"
"vld1.32 d0[1], [%[a]], %[kBPS] \n"
"vld1.32 d2[0], [%[a]], %[kBPS] \n"
"vld1.32 d2[1], [%[a]] \n"
"vld1.32 d1[0], [%[b]], %[kBPS] \n"
"vld1.32 d1[1], [%[b]], %[kBPS] \n"
"vld1.32 d3[0], [%[b]], %[kBPS] \n"
"vld1.32 d3[1], [%[b]] \n"
// a d0/d2, b d1/d3
// d0/d1: 01 01 01 01
// d2/d3: 23 23 23 23
// But: it goes 01 45 23 67
// Notice the middle values are transposed
"vtrn.16 q0, q1 \n"
// {a0, a1} = {in[0] + in[2], in[1] + in[3]}
"vaddl.u8 q2, d0, d2 \n"
"vaddl.u8 q10, d1, d3 \n"
// {a3, a2} = {in[0] - in[2], in[1] - in[3]}
"vsubl.u8 q3, d0, d2 \n"
"vsubl.u8 q11, d1, d3 \n"
// tmp[0] = a0 + a1
"vpaddl.s16 q0, q2 \n"
"vpaddl.s16 q8, q10 \n"
// tmp[1] = a3 + a2
"vpaddl.s16 q1, q3 \n"
"vpaddl.s16 q9, q11 \n"
// No pair subtract
// q2 = {a0, a3}
// q3 = {a1, a2}
"vtrn.16 q2, q3 \n"
"vtrn.16 q10, q11 \n"
// {tmp[3], tmp[2]} = {a0 - a1, a3 - a2}
"vsubl.s16 q12, d4, d6 \n"
"vsubl.s16 q13, d5, d7 \n"
"vsubl.s16 q14, d20, d22 \n"
"vsubl.s16 q15, d21, d23 \n"
// separate tmp[3] and tmp[2]
// q12 = tmp[3]
// q13 = tmp[2]
"vtrn.32 q12, q13 \n"
"vtrn.32 q14, q15 \n"
// Transpose tmp for a
"vswp d1, d26 \n" // vtrn.64
"vswp d3, d24 \n" // vtrn.64
"vtrn.32 q0, q1 \n"
"vtrn.32 q13, q12 \n"
// Transpose tmp for b
"vswp d17, d30 \n" // vtrn.64
"vswp d19, d28 \n" // vtrn.64
"vtrn.32 q8, q9 \n"
"vtrn.32 q15, q14 \n"
// The first Q register is a, the second b.
// q0/8 tmp[0-3]
// q13/15 tmp[4-7]
// q1/9 tmp[8-11]
// q12/14 tmp[12-15]
// These are still in 01 45 23 67 order. We fix it easily in the addition
// case but the subtraction propegates them.
"vswp d3, d27 \n"
"vswp d19, d31 \n"
// a0 = tmp[0] + tmp[8]
"vadd.s32 q2, q0, q1 \n"
"vadd.s32 q3, q8, q9 \n"
// a1 = tmp[4] + tmp[12]
"vadd.s32 q10, q13, q12 \n"
"vadd.s32 q11, q15, q14 \n"
// a2 = tmp[4] - tmp[12]
"vsub.s32 q13, q13, q12 \n"
"vsub.s32 q15, q15, q14 \n"
// a3 = tmp[0] - tmp[8]
"vsub.s32 q0, q0, q1 \n"
"vsub.s32 q8, q8, q9 \n"
// b0 = a0 + a1
"vadd.s32 q1, q2, q10 \n"
"vadd.s32 q9, q3, q11 \n"
// b1 = a3 + a2
"vadd.s32 q12, q0, q13 \n"
"vadd.s32 q14, q8, q15 \n"
// b2 = a3 - a2
"vsub.s32 q0, q0, q13 \n"
"vsub.s32 q8, q8, q15 \n"
// b3 = a0 - a1
"vsub.s32 q2, q2, q10 \n"
"vsub.s32 q3, q3, q11 \n"
"vld1.64 {q10, q11}, [%[w]] \n"
// abs(b0)
"vabs.s32 q1, q1 \n"
"vabs.s32 q9, q9 \n"
// abs(b1)
"vabs.s32 q12, q12 \n"
"vabs.s32 q14, q14 \n"
// abs(b2)
"vabs.s32 q0, q0 \n"
"vabs.s32 q8, q8 \n"
// abs(b3)
"vabs.s32 q2, q2 \n"
"vabs.s32 q3, q3 \n"
// expand w before using.
"vmovl.u16 q13, d20 \n"
"vmovl.u16 q15, d21 \n"
// w[0] * abs(b0)
"vmul.u32 q1, q1, q13 \n"
"vmul.u32 q9, q9, q13 \n"
// w[4] * abs(b1)
"vmla.u32 q1, q12, q15 \n"
"vmla.u32 q9, q14, q15 \n"
// expand w before using.
"vmovl.u16 q13, d22 \n"
"vmovl.u16 q15, d23 \n"
// w[8] * abs(b1)
"vmla.u32 q1, q0, q13 \n"
"vmla.u32 q9, q8, q13 \n"
// w[12] * abs(b1)
"vmla.u32 q1, q2, q15 \n"
"vmla.u32 q9, q3, q15 \n"
// Sum the arrays
"vpaddl.u32 q1, q1 \n"
"vpaddl.u32 q9, q9 \n"
"vadd.u64 d2, d3 \n"
"vadd.u64 d18, d19 \n"
// Hadamard transform needs 4 bits of extra precision (2 bits in each
// direction) for dynamic raw. Weights w[] are 16bits at max, so the maximum
// precision for coeff is 8bit of input + 4bits of Hadamard transform +
// 16bits for w[] + 2 bits of abs() summation.
//
// This uses a maximum of 31 bits (signed). Discarding the top 32 bits is
// A-OK.
// sum2 - sum1
"vsub.u32 d0, d2, d18 \n"
// abs(sum2 - sum1)
"vabs.s32 d0, d0 \n"
// abs(sum2 - sum1) >> 5
"vshr.u32 d0, #5 \n"
// It would be better to move the value straight into r0 but I'm not
// entirely sure how this works with inline assembly.
"vmov.32 %[sum], d0[0] \n"
: [sum] "=r"(sum), [a] "+r"(A), [b] "+r"(B), [w] "+r"(W)
: [kBPS] "r"(kBPS)
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15" // clobbered
) ;
return sum;
}
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
D += Disto4x4(a + x + y, b + x + y, w);
}
}
return D;
}
#endif // WEBP_USE_NEON
//------------------------------------------------------------------------------
// Entry point
extern void VP8EncDspInitNEON(void);
void VP8EncDspInitNEON(void) {
#if defined(WEBP_USE_NEON)
VP8ITransform = ITransform;
VP8FTransform = FTransform;
VP8ITransformWHT = ITransformWHT;
VP8FTransformWHT = FTransformWHT;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
#endif // WEBP_USE_NEON
}
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

@ -21,17 +21,48 @@ extern "C" {
#include "../enc/vp8enci.h"
//------------------------------------------------------------------------------
// Quite useful macro for debugging. Left here for convenience.
#if 0
#include <stdio.h>
static void PrintReg(const __m128i r, const char* const name, int size) {
int n;
union {
__m128i r;
uint8_t i8[16];
uint16_t i16[8];
uint32_t i32[4];
uint64_t i64[2];
} tmp;
tmp.r = r;
printf("%s\t: ", name);
if (size == 8) {
for (n = 0; n < 16; ++n) printf("%.2x ", tmp.i8[n]);
} else if (size == 16) {
for (n = 0; n < 8; ++n) printf("%.4x ", tmp.i16[n]);
} else if (size == 32) {
for (n = 0; n < 4; ++n) printf("%.8x ", tmp.i32[n]);
} else {
for (n = 0; n < 2; ++n) printf("%.16lx ", tmp.i64[n]);
}
printf("\n");
}
#endif
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block) {
int histo[MAX_COEFF_THRESH + 1] = { 0 };
int16_t out[16];
int j, k;
static void CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
int j;
for (j = start_block; j < end_block; ++j) {
int16_t out[16];
int k;
VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
// Convert coefficients to bin (within out[]).
@ -47,9 +78,9 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
const __m128i xor1 = _mm_xor_si128(out1, sign1);
const __m128i abs0 = _mm_sub_epi16(xor0, sign0);
const __m128i abs1 = _mm_sub_epi16(xor1, sign1);
// v = abs(out) >> 2
const __m128i v0 = _mm_srai_epi16(abs0, 2);
const __m128i v1 = _mm_srai_epi16(abs1, 2);
// v = abs(out) >> 3
const __m128i v0 = _mm_srai_epi16(abs0, 3);
const __m128i v1 = _mm_srai_epi16(abs1, 3);
// bin = min(v, MAX_COEFF_THRESH)
const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
@ -58,13 +89,11 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
_mm_storeu_si128((__m128i*)&out[8], bin1);
}
// Use bin to update histogram.
// Convert coefficients to bin.
for (k = 0; k < 16; ++k) {
histo[out[k]]++;
histo->distribution[out[k]]++;
}
}
return VP8GetAlpha(histo);
}
//------------------------------------------------------------------------------
@ -243,7 +272,7 @@ static void ITransformSSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
// Add inverse transform to 'ref' and store.
{
const __m128i zero = _mm_set1_epi16(0);
const __m128i zero = _mm_setzero_si128();
// Load the reference(s).
__m128i ref0, ref1, ref2, ref3;
if (do_two) {
@ -295,17 +324,23 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
const __m128i zero = _mm_setzero_si128();
const __m128i seven = _mm_set1_epi16(7);
const __m128i k7500 = _mm_set1_epi32(7500);
const __m128i k14500 = _mm_set1_epi32(14500);
const __m128i k937 = _mm_set1_epi32(937);
const __m128i k1812 = _mm_set1_epi32(1812);
const __m128i k51000 = _mm_set1_epi32(51000);
const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
5352, 2217, 5352, 2217);
const __m128i k2217_5352 = _mm_set_epi16(2217, -5352, 2217, -5352,
2217, -5352, 2217, -5352);
const __m128i k88p = _mm_set_epi16(8, 8, 8, 8, 8, 8, 8, 8);
const __m128i k88m = _mm_set_epi16(-8, 8, -8, 8, -8, 8, -8, 8);
const __m128i k5352_2217p = _mm_set_epi16(2217, 5352, 2217, 5352,
2217, 5352, 2217, 5352);
const __m128i k5352_2217m = _mm_set_epi16(-5352, 2217, -5352, 2217,
-5352, 2217, -5352, 2217);
__m128i v01, v32;
// Difference between src and ref and initial transpose.
{
// Load src and convert to 16b.
@ -326,73 +361,52 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
// Compute difference.
// Compute difference. -> 00 01 02 03 00 00 00 00
const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
// Transpose.
// Unpack and shuffle
// 00 01 02 03 0 0 0 0
// 10 11 12 13 0 0 0 0
// 20 21 22 23 0 0 0 0
// 30 31 32 33 0 0 0 0
const __m128i transpose0_0 = _mm_unpacklo_epi16(diff0, diff1);
const __m128i transpose0_1 = _mm_unpacklo_epi16(diff2, diff3);
// 00 10 01 11 02 12 03 13
// 20 30 21 31 22 32 23 33
const __m128i v23 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
v01 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));
// a02 a12 a22 a32 a03 a13 a23 a33
// a00 a10 a20 a30 a01 a11 a21 a31
// a03 a13 a23 a33 a02 a12 a22 a32
}
// First pass and subsequent transpose.
{
// Same operations are done on the (0,3) and (1,2) pairs.
// b0 = (a0 + a3) << 3
// b1 = (a1 + a2) << 3
// b3 = (a0 - a3) << 3
// b2 = (a1 - a2) << 3
const __m128i a01 = _mm_add_epi16(v01, v32);
const __m128i a32 = _mm_sub_epi16(v01, v32);
const __m128i b01 = _mm_slli_epi16(a01, 3);
const __m128i b32 = _mm_slli_epi16(a32, 3);
const __m128i b11 = _mm_unpackhi_epi64(b01, b01);
const __m128i b22 = _mm_unpackhi_epi64(b32, b32);
// e0 = b0 + b1
// e2 = b0 - b1
const __m128i e0 = _mm_add_epi16(b01, b11);
const __m128i e2 = _mm_sub_epi16(b01, b11);
const __m128i e02 = _mm_unpacklo_epi64(e0, e2);
// e1 = (b3 * 5352 + b2 * 2217 + 14500) >> 12
// e3 = (b3 * 2217 - b2 * 5352 + 7500) >> 12
const __m128i b23 = _mm_unpacklo_epi16(b22, b32);
const __m128i c1 = _mm_madd_epi16(b23, k5352_2217);
const __m128i c3 = _mm_madd_epi16(b23, k2217_5352);
const __m128i d1 = _mm_add_epi32(c1, k14500);
const __m128i d3 = _mm_add_epi32(c3, k7500);
const __m128i e1 = _mm_srai_epi32(d1, 12);
const __m128i e3 = _mm_srai_epi32(d3, 12);
const __m128i e13 = _mm_packs_epi32(e1, e3);
// Transpose.
// 00 01 02 03 20 21 22 23
// 10 11 12 13 30 31 32 33
const __m128i transpose0_0 = _mm_unpacklo_epi16(e02, e13);
const __m128i transpose0_1 = _mm_unpackhi_epi16(e02, e13);
// 00 10 01 11 02 12 03 13
// 20 30 21 31 22 32 23 33
const __m128i v23 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
v01 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));
// 02 12 22 32 03 13 23 33
// 00 10 20 30 01 11 21 31
// 03 13 23 33 02 12 22 32
const __m128i shuf01 = _mm_unpacklo_epi32(diff0, diff1);
const __m128i shuf23 = _mm_unpacklo_epi32(diff2, diff3);
// 00 01 10 11 02 03 12 13
// 20 21 30 31 22 23 32 33
const __m128i shuf01_p =
_mm_shufflehi_epi16(shuf01, _MM_SHUFFLE(2, 3, 0, 1));
const __m128i shuf23_p =
_mm_shufflehi_epi16(shuf23, _MM_SHUFFLE(2, 3, 0, 1));
// 00 01 10 11 03 02 13 12
// 20 21 30 31 23 22 33 32
const __m128i s01 = _mm_unpacklo_epi64(shuf01_p, shuf23_p);
const __m128i s32 = _mm_unpackhi_epi64(shuf01_p, shuf23_p);
// 00 01 10 11 20 21 30 31
// 03 02 13 12 23 22 33 32
const __m128i a01 = _mm_add_epi16(s01, s32);
const __m128i a32 = _mm_sub_epi16(s01, s32);
// [d0 + d3 | d1 + d2 | ...] = [a0 a1 | a0' a1' | ... ]
// [d0 - d3 | d1 - d2 | ...] = [a3 a2 | a3' a2' | ... ]
const __m128i tmp0 = _mm_madd_epi16(a01, k88p); // [ (a0 + a1) << 3, ... ]
const __m128i tmp2 = _mm_madd_epi16(a01, k88m); // [ (a0 - a1) << 3, ... ]
const __m128i tmp1_1 = _mm_madd_epi16(a32, k5352_2217p);
const __m128i tmp3_1 = _mm_madd_epi16(a32, k5352_2217m);
const __m128i tmp1_2 = _mm_add_epi32(tmp1_1, k1812);
const __m128i tmp3_2 = _mm_add_epi32(tmp3_1, k937);
const __m128i tmp1 = _mm_srai_epi32(tmp1_2, 9);
const __m128i tmp3 = _mm_srai_epi32(tmp3_2, 9);
const __m128i s03 = _mm_packs_epi32(tmp0, tmp2);
const __m128i s12 = _mm_packs_epi32(tmp1, tmp3);
const __m128i s_lo = _mm_unpacklo_epi16(s03, s12); // 0 1 0 1 0 1...
const __m128i s_hi = _mm_unpackhi_epi16(s03, s12); // 2 3 2 3 2 3
const __m128i v23 = _mm_unpackhi_epi32(s_lo, s_hi);
v01 = _mm_unpacklo_epi32(s_lo, s_hi);
v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2)); // 3 2 3 2 3 2..
}
// Second pass
@ -406,13 +420,12 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
const __m128i a32 = _mm_sub_epi16(v01, v32);
const __m128i a11 = _mm_unpackhi_epi64(a01, a01);
const __m128i a22 = _mm_unpackhi_epi64(a32, a32);
const __m128i a01_plus_7 = _mm_add_epi16(a01, seven);
// d0 = (a0 + a1 + 7) >> 4;
// d2 = (a0 - a1 + 7) >> 4;
const __m128i b0 = _mm_add_epi16(a01, a11);
const __m128i b2 = _mm_sub_epi16(a01, a11);
const __m128i c0 = _mm_add_epi16(b0, seven);
const __m128i c2 = _mm_add_epi16(b2, seven);
const __m128i c0 = _mm_add_epi16(a01_plus_7, a11);
const __m128i c2 = _mm_sub_epi16(a01_plus_7, a11);
const __m128i d0 = _mm_srai_epi16(c0, 4);
const __m128i d2 = _mm_srai_epi16(c2, 4);
@ -430,6 +443,7 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
// f1 = f1 + (a3 != 0);
// The compare will return (0xffff, 0) for (==0, !=0). To turn that into the
// desired (0, 1), we add one earlier through k12000_plus_one.
// -> f1 = f1 + 1 - (a3 == 0)
const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero));
_mm_storel_epi64((__m128i*)&out[ 0], d0);
@ -442,10 +456,101 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
//------------------------------------------------------------------------------
// Metric
static int SSE_Nx4SSE2(const uint8_t* a, const uint8_t* b,
int num_quads, int do_16) {
const __m128i zero = _mm_setzero_si128();
__m128i sum1 = zero;
__m128i sum2 = zero;
while (num_quads-- > 0) {
// Note: for the !do_16 case, we read 16 pixels instead of 8 but that's ok,
// thanks to buffer over-allocation to that effect.
const __m128i a0 = _mm_loadu_si128((__m128i*)&a[BPS * 0]);
const __m128i a1 = _mm_loadu_si128((__m128i*)&a[BPS * 1]);
const __m128i a2 = _mm_loadu_si128((__m128i*)&a[BPS * 2]);
const __m128i a3 = _mm_loadu_si128((__m128i*)&a[BPS * 3]);
const __m128i b0 = _mm_loadu_si128((__m128i*)&b[BPS * 0]);
const __m128i b1 = _mm_loadu_si128((__m128i*)&b[BPS * 1]);
const __m128i b2 = _mm_loadu_si128((__m128i*)&b[BPS * 2]);
const __m128i b3 = _mm_loadu_si128((__m128i*)&b[BPS * 3]);
// compute clip0(a-b) and clip0(b-a)
const __m128i a0p = _mm_subs_epu8(a0, b0);
const __m128i a0m = _mm_subs_epu8(b0, a0);
const __m128i a1p = _mm_subs_epu8(a1, b1);
const __m128i a1m = _mm_subs_epu8(b1, a1);
const __m128i a2p = _mm_subs_epu8(a2, b2);
const __m128i a2m = _mm_subs_epu8(b2, a2);
const __m128i a3p = _mm_subs_epu8(a3, b3);
const __m128i a3m = _mm_subs_epu8(b3, a3);
// compute |a-b| with 8b arithmetic as clip0(a-b) | clip0(b-a)
const __m128i diff0 = _mm_or_si128(a0p, a0m);
const __m128i diff1 = _mm_or_si128(a1p, a1m);
const __m128i diff2 = _mm_or_si128(a2p, a2m);
const __m128i diff3 = _mm_or_si128(a3p, a3m);
// unpack (only four operations, instead of eight)
const __m128i low0 = _mm_unpacklo_epi8(diff0, zero);
const __m128i low1 = _mm_unpacklo_epi8(diff1, zero);
const __m128i low2 = _mm_unpacklo_epi8(diff2, zero);
const __m128i low3 = _mm_unpacklo_epi8(diff3, zero);
// multiply with self
const __m128i low_madd0 = _mm_madd_epi16(low0, low0);
const __m128i low_madd1 = _mm_madd_epi16(low1, low1);
const __m128i low_madd2 = _mm_madd_epi16(low2, low2);
const __m128i low_madd3 = _mm_madd_epi16(low3, low3);
// collect in a cascading way
const __m128i low_sum0 = _mm_add_epi32(low_madd0, low_madd1);
const __m128i low_sum1 = _mm_add_epi32(low_madd2, low_madd3);
sum1 = _mm_add_epi32(sum1, low_sum0);
sum2 = _mm_add_epi32(sum2, low_sum1);
if (do_16) { // if necessary, process the higher 8 bytes similarly
const __m128i hi0 = _mm_unpackhi_epi8(diff0, zero);
const __m128i hi1 = _mm_unpackhi_epi8(diff1, zero);
const __m128i hi2 = _mm_unpackhi_epi8(diff2, zero);
const __m128i hi3 = _mm_unpackhi_epi8(diff3, zero);
const __m128i hi_madd0 = _mm_madd_epi16(hi0, hi0);
const __m128i hi_madd1 = _mm_madd_epi16(hi1, hi1);
const __m128i hi_madd2 = _mm_madd_epi16(hi2, hi2);
const __m128i hi_madd3 = _mm_madd_epi16(hi3, hi3);
const __m128i hi_sum0 = _mm_add_epi32(hi_madd0, hi_madd1);
const __m128i hi_sum1 = _mm_add_epi32(hi_madd2, hi_madd3);
sum1 = _mm_add_epi32(sum1, hi_sum0);
sum2 = _mm_add_epi32(sum2, hi_sum1);
}
a += 4 * BPS;
b += 4 * BPS;
}
{
int32_t tmp[4];
const __m128i sum = _mm_add_epi32(sum1, sum2);
_mm_storeu_si128((__m128i*)tmp, sum);
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
}
}
static int SSE16x16SSE2(const uint8_t* a, const uint8_t* b) {
return SSE_Nx4SSE2(a, b, 4, 1);
}
static int SSE16x8SSE2(const uint8_t* a, const uint8_t* b) {
return SSE_Nx4SSE2(a, b, 2, 1);
}
static int SSE8x8SSE2(const uint8_t* a, const uint8_t* b) {
return SSE_Nx4SSE2(a, b, 2, 0);
}
static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
const __m128i zero = _mm_set1_epi16(0);
const __m128i zero = _mm_setzero_si128();
// Load values.
// Load values. Note that we read 8 pixels instead of 4,
// but the a/b buffers are over-allocated to that effect.
const __m128i a0 = _mm_loadl_epi64((__m128i*)&a[BPS * 0]);
const __m128i a1 = _mm_loadl_epi64((__m128i*)&a[BPS * 1]);
const __m128i a2 = _mm_loadl_epi64((__m128i*)&a[BPS * 2]);
@ -483,6 +588,7 @@ static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
const __m128i sum0 = _mm_add_epi32(madd0, madd1);
const __m128i sum1 = _mm_add_epi32(madd2, madd3);
const __m128i sum2 = _mm_add_epi32(sum0, sum1);
int32_t tmp[4];
_mm_storeu_si128((__m128i*)tmp, sum2);
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
@ -502,8 +608,6 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
int32_t sum[4];
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
const __m128i zero = _mm_setzero_si128();
const __m128i one = _mm_set1_epi16(1);
const __m128i three = _mm_set1_epi16(3);
// Load, combine and tranpose inputs.
{
@ -550,17 +654,14 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
// Horizontal pass and subsequent transpose.
{
// Calculate a and b (two 4x4 at once).
const __m128i a0 = _mm_slli_epi16(_mm_add_epi16(tmp_0, tmp_2), 2);
const __m128i a1 = _mm_slli_epi16(_mm_add_epi16(tmp_1, tmp_3), 2);
const __m128i a2 = _mm_slli_epi16(_mm_sub_epi16(tmp_1, tmp_3), 2);
const __m128i a3 = _mm_slli_epi16(_mm_sub_epi16(tmp_0, tmp_2), 2);
// b0_extra = (a0 != 0);
const __m128i b0_extra = _mm_andnot_si128(_mm_cmpeq_epi16 (a0, zero), one);
const __m128i b0_base = _mm_add_epi16(a0, a1);
const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
const __m128i b0 = _mm_add_epi16(a0, a1);
const __m128i b1 = _mm_add_epi16(a3, a2);
const __m128i b2 = _mm_sub_epi16(a3, a2);
const __m128i b3 = _mm_sub_epi16(a0, a1);
const __m128i b0 = _mm_add_epi16(b0_base, b0_extra);
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
@ -635,19 +736,6 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
B_b2 = _mm_sub_epi16(B_b2, sign_B_b2);
}
// b = abs(b) + 3
A_b0 = _mm_add_epi16(A_b0, three);
A_b2 = _mm_add_epi16(A_b2, three);
B_b0 = _mm_add_epi16(B_b0, three);
B_b2 = _mm_add_epi16(B_b2, three);
// abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
// b = (abs(b) + 3) >> 3
A_b0 = _mm_srai_epi16(A_b0, 3);
A_b2 = _mm_srai_epi16(A_b2, 3);
B_b0 = _mm_srai_epi16(B_b0, 3);
B_b2 = _mm_srai_epi16(B_b2, 3);
// weighted sums
A_b0 = _mm_madd_epi16(A_b0, w_0);
A_b2 = _mm_madd_epi16(A_b2, w_8);
@ -666,7 +754,7 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
static int Disto4x4SSE2(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int diff_sum = TTransformSSE2(a, b, w);
return (abs(diff_sum) + 8) >> 4;
return abs(diff_sum) >> 5;
}
static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
@ -681,7 +769,6 @@ static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
return D;
}
//------------------------------------------------------------------------------
// Quantization
//
@ -689,9 +776,8 @@ static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
// Simple quantization
static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
int n, const VP8Matrix* const mtx) {
const __m128i max_coeff_2047 = _mm_set1_epi16(2047);
const __m128i zero = _mm_set1_epi16(0);
__m128i sign0, sign8;
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
const __m128i zero = _mm_setzero_si128();
__m128i coeff0, coeff8;
__m128i out0, out8;
__m128i packed_out;
@ -713,8 +799,8 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
const __m128i zthresh8 = _mm_loadu_si128((__m128i*)&mtx->zthresh_[8]);
// sign(in) = in >> 15 (0x0000 if positive, 0xffff if negative)
sign0 = _mm_srai_epi16(in0, 15);
sign8 = _mm_srai_epi16(in8, 15);
const __m128i sign0 = _mm_srai_epi16(in0, 15);
const __m128i sign8 = _mm_srai_epi16(in8, 15);
// coeff = abs(in) = (in ^ sign) - sign
coeff0 = _mm_xor_si128(in0, sign0);
@ -726,10 +812,6 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
coeff0 = _mm_add_epi16(coeff0, sharpen0);
coeff8 = _mm_add_epi16(coeff8, sharpen8);
// if (coeff > 2047) coeff = 2047
coeff0 = _mm_min_epi16(coeff0, max_coeff_2047);
coeff8 = _mm_min_epi16(coeff8, max_coeff_2047);
// out = (coeff * iQ + B) >> QFIX;
{
// doing calculations with 32b precision (QFIX=17)
@ -757,9 +839,14 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
out_04 = _mm_srai_epi32(out_04, QFIX);
out_08 = _mm_srai_epi32(out_08, QFIX);
out_12 = _mm_srai_epi32(out_12, QFIX);
// pack result as 16b
out0 = _mm_packs_epi32(out_00, out_04);
out8 = _mm_packs_epi32(out_08, out_12);
// if (coeff > 2047) coeff = 2047
out0 = _mm_min_epi16(out0, max_coeff_2047);
out8 = _mm_min_epi16(out8, max_coeff_2047);
}
// get sign back (if (sign[j]) out_n = -out_n)
@ -832,6 +919,9 @@ void VP8EncDspInitSSE2(void) {
VP8EncQuantizeBlock = QuantizeBlockSSE2;
VP8ITransform = ITransformSSE2;
VP8FTransform = FTransformSSE2;
VP8SSE16x16 = SSE16x16SSE2;
VP8SSE16x8 = SSE16x8SSE2;
VP8SSE8x8 = SSE8x8SSE2;
VP8SSE4x4 = SSE4x4SSE2;
VP8TDisto4x4 = Disto4x4SSE2;
VP8TDisto16x16 = Disto16x16SSE2;

@ -11,25 +11,31 @@
// Jyrki Alakuijala (jyrki@google.com)
// Urvang Joshi (urvang@google.com)
#include "./dsp.h"
// Define the following if target arch is sure to have SSE2
// #define WEBP_TARGET_HAS_SSE2
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if defined(WEBP_TARGET_HAS_SSE2)
#include <emmintrin.h>
#endif
#include <math.h>
#include <stdlib.h>
#include "./lossless.h"
#include "../dec/vp8li.h"
#include "../dsp/yuv.h"
#include "../dsp/dsp.h"
#include "../enc/histogram.h"
#include "./yuv.h"
#define MAX_DIFF_COST (1e30f)
// lookup table for small values of log2(int)
#define APPROX_LOG_MAX 4096
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
#define LOG_LOOKUP_IDX_MAX 256
static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
0.0000000000000000f, 0.0000000000000000f,
1.0000000000000000f, 1.5849625007211560f,
2.0000000000000000f, 2.3219280948873621f,
@ -160,16 +166,97 @@ static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
7.9886846867721654f, 7.9943534368588577f
};
float VP8LFastLog2(int v) {
if (v < LOG_LOOKUP_IDX_MAX) {
return kLog2Table[v];
} else if (v < APPROX_LOG_MAX) {
const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
};
float VP8LFastSLog2Slow(int v) {
assert(v >= LOG_LOOKUP_IDX_MAX);
if (v < APPROX_LOG_MAX) {
int log_cnt = 0;
const float v_f = (float)v;
while (v >= LOG_LOOKUP_IDX_MAX) {
++log_cnt;
v = v >> 1;
}
return v_f * (kLog2Table[v] + log_cnt);
} else {
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
}
}
float VP8LFastLog2Slow(int v) {
assert(v >= LOG_LOOKUP_IDX_MAX);
if (v < APPROX_LOG_MAX) {
int log_cnt = 0;
while (v >= LOG_LOOKUP_IDX_MAX) {
++log_cnt;
v = v >> 1;
}
return kLog2Table[v] + (float)log_cnt;
return kLog2Table[v] + log_cnt;
} else {
return (float)(LOG_2_RECIPROCAL * log((double)v));
}
@ -198,6 +285,61 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
return Average2(Average2(a0, a1), Average2(a2, a3));
}
#if defined(WEBP_TARGET_HAS_SSE2)
static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
uint32_t c2) {
const __m128i zero = _mm_setzero_si128();
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
const __m128i V1 = _mm_add_epi16(C0, C1);
const __m128i V2 = _mm_sub_epi16(V1, C2);
const __m128i b = _mm_packus_epi16(V2, V2);
const uint32_t output = _mm_cvtsi128_si32(b);
return output;
}
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
uint32_t c2) {
const uint32_t ave = Average2(c0, c1);
const __m128i zero = _mm_setzero_si128();
const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
const __m128i A1 = _mm_sub_epi16(A0, B0);
const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
const __m128i A2 = _mm_sub_epi16(A1, BgtA);
const __m128i A3 = _mm_srai_epi16(A2, 1);
const __m128i A4 = _mm_add_epi16(A0, A3);
const __m128i A5 = _mm_packus_epi16(A4, A4);
const uint32_t output = _mm_cvtsi128_si32(A5);
return output;
}
static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
int pa_minus_pb;
const __m128i zero = _mm_setzero_si128();
const __m128i A0 = _mm_cvtsi32_si128(a);
const __m128i B0 = _mm_cvtsi32_si128(b);
const __m128i C0 = _mm_cvtsi32_si128(c);
const __m128i AC0 = _mm_subs_epu8(A0, C0);
const __m128i CA0 = _mm_subs_epu8(C0, A0);
const __m128i BC0 = _mm_subs_epu8(B0, C0);
const __m128i CB0 = _mm_subs_epu8(C0, B0);
const __m128i AC = _mm_or_si128(AC0, CA0);
const __m128i BC = _mm_or_si128(BC0, CB0);
const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
const __m128i diff = _mm_sub_epi16(pb, pa);
{
int16_t out[8];
_mm_storeu_si128((__m128i*)out, diff);
pa_minus_pb = out[0] + out[1] + out[2] + out[3];
}
return (pa_minus_pb <= 0) ? a : b;
}
#else
static WEBP_INLINE uint32_t Clip255(uint32_t a) {
if (a < 256) {
return a;
@ -239,9 +381,9 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
}
static WEBP_INLINE int Sub3(int a, int b, int c) {
const int pa = b - c;
const int pb = a - c;
return abs(pa) - abs(pb);
const int pb = b - c;
const int pa = a - c;
return abs(pb) - abs(pa);
}
static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
@ -250,9 +392,9 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
return (pa_minus_pb <= 0) ? a : b;
}
#endif
//------------------------------------------------------------------------------
// Predictors
@ -340,35 +482,36 @@ static float PredictionCostSpatial(const int* counts,
return (float)(-0.1 * bits);
}
// Compute the Shanon's entropy: Sum(p*log2(p))
static float ShannonEntropy(const int* const array, int n) {
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
static float CombinedShannonEntropy(const int* const X,
const int* const Y, int n) {
int i;
float retval = 0.f;
int sum = 0;
double retval = 0.;
int sumX = 0, sumXY = 0;
for (i = 0; i < n; ++i) {
if (array[i] != 0) {
sum += array[i];
retval -= VP8LFastSLog2(array[i]);
const int x = X[i];
const int xy = X[i] + Y[i];
if (x != 0) {
sumX += x;
retval -= VP8LFastSLog2(x);
}
if (xy != 0) {
sumXY += xy;
retval -= VP8LFastSLog2(xy);
}
}
retval += VP8LFastSLog2(sum);
return retval;
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
return (float)retval;
}
static float PredictionCostSpatialHistogram(int accumulated[4][256],
int tile[4][256]) {
int i;
int k;
int combo[256];
double retval = 0;
for (i = 0; i < 4; ++i) {
const double exp_val = 0.94;
retval += PredictionCostSpatial(&tile[i][0], 1, exp_val);
retval += ShannonEntropy(&tile[i][0], 256);
for (k = 0; k < 256; ++k) {
combo[k] = accumulated[i][k] + tile[i][k];
}
retval += ShannonEntropy(&combo[0], 256);
const double kExpValue = 0.94;
retval += PredictionCostSpatial(tile[i], 1, kExpValue);
retval += CombinedShannonEntropy(tile[i], accumulated[i], 256);
}
return (float)retval;
}
@ -572,8 +715,21 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
}
void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
int i;
for (i = 0; i < num_pixs; ++i) {
int i = 0;
#if defined(WEBP_TARGET_HAS_SSE2)
const __m128i mask = _mm_set1_epi32(0x0000ff00);
for (; i + 4 < num_pixs; i += 4) {
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
const __m128i out = _mm_sub_epi8(in, in_0g0g);
_mm_storeu_si128((__m128i*)&argb_data[i], out);
}
// fallthrough and finish off with plain-C
#endif
for (; i < num_pixs; ++i) {
const uint32_t argb = argb_data[i];
const uint32_t green = (argb >> 8) & 0xff;
const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
@ -588,9 +744,21 @@ static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
int y_start, int y_end, uint32_t* data) {
const int width = transform->xsize_;
const uint32_t* const data_end = data + (y_end - y_start) * width;
#if defined(WEBP_TARGET_HAS_SSE2)
const __m128i mask = _mm_set1_epi32(0x0000ff00);
for (; data + 4 < data_end; data += 4) {
const __m128i in = _mm_loadu_si128((__m128i*)data);
const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
const __m128i out = _mm_add_epi8(in, in_0g0g);
_mm_storeu_si128((__m128i*)data, out);
}
// fallthrough and finish off with plain-C
#endif
while (data < data_end) {
const uint32_t argb = *data;
// "* 0001001u" is equivalent to "(green << 16) + green)"
const uint32_t green = ((argb >> 8) & 0xff);
uint32_t red_blue = (argb & 0x00ff00ffu);
red_blue += (green << 16) | green;
@ -655,6 +823,25 @@ static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
}
static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
uint32_t argb) {
const uint32_t green = argb >> 8;
uint32_t new_red = argb >> 16;
new_red -= ColorTransformDelta(green_to_red, green);
return (new_red & 0xff);
}
static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
uint8_t red_to_blue,
uint32_t argb) {
const uint32_t green = argb >> 8;
const uint32_t red = argb >> 16;
uint8_t new_blue = argb;
new_blue -= ColorTransformDelta(green_to_blue, green);
new_blue -= ColorTransformDelta(red_to_blue, red);
return (new_blue & 0xff);
}
static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
int ix, int xsize) {
const uint32_t v = argb[ix];
@ -675,14 +862,10 @@ static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
static float PredictionCostCrossColor(const int accumulated[256],
const int counts[256]) {
// Favor low entropy, locally and globally.
int i;
int combo[256];
for (i = 0; i < 256; ++i) {
combo[i] = accumulated[i] + counts[i];
}
return ShannonEntropy(combo, 256) +
ShannonEntropy(counts, 256) +
PredictionCostSpatial(counts, 3, 2.4); // Favor small absolute values.
// Favor small absolute values for PredictionCostSpatial
static const double kExpValue = 2.4;
return CombinedShannonEntropy(counts, accumulated, 256) +
PredictionCostSpatial(counts, 3, kExpValue);
}
static Multipliers GetBestColorTransformForTile(
@ -712,85 +895,75 @@ static Multipliers GetBestColorTransformForTile(
if (all_y_max > ysize) {
all_y_max = ysize;
}
for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
int histo[256] = { 0 };
int all_y;
Multipliers tx;
MultipliersClear(&tx);
tx.green_to_red_ = green_to_red & 0xff;
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
uint32_t predict;
int ix = all_y * xsize + tile_x_offset;
int all_x;
for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
if (SkipRepeatedPixels(argb, ix, xsize)) {
continue;
}
predict = TransformColor(&tx, argb[ix], 0);
++histo[(predict >> 16) & 0xff]; // red.
++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
}
}
cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
if (tx.green_to_red_ == prevX.green_to_red_) {
if ((uint8_t)green_to_red == prevX.green_to_red_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.green_to_red_ == prevY.green_to_red_) {
if ((uint8_t)green_to_red == prevY.green_to_red_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.green_to_red_ == 0) {
if (green_to_red == 0) {
cur_diff -= 3;
}
if (cur_diff < best_diff) {
best_diff = cur_diff;
best_tx = tx;
best_tx.green_to_red_ = green_to_red;
}
}
best_diff = MAX_DIFF_COST;
green_to_red = best_tx.green_to_red_;
for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
int all_y;
int histo[256] = { 0 };
Multipliers tx;
tx.green_to_red_ = green_to_red;
tx.green_to_blue_ = green_to_blue;
tx.red_to_blue_ = red_to_blue;
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
uint32_t predict;
int all_x;
int ix = all_y * xsize + tile_x_offset;
for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
if (SkipRepeatedPixels(argb, ix, xsize)) {
continue;
}
predict = TransformColor(&tx, argb[ix], 0);
++histo[predict & 0xff]; // blue.
++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
}
}
cur_diff =
PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
if (tx.green_to_blue_ == prevX.green_to_blue_) {
PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.green_to_blue_ == prevY.green_to_blue_) {
if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.red_to_blue_ == prevX.red_to_blue_) {
if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.red_to_blue_ == prevY.red_to_blue_) {
if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.green_to_blue_ == 0) {
if (green_to_blue == 0) {
cur_diff -= 3;
}
if (tx.red_to_blue_ == 0) {
if (red_to_blue == 0) {
cur_diff -= 3;
}
if (cur_diff < best_diff) {
best_diff = cur_diff;
best_tx = tx;
best_tx.green_to_blue_ = green_to_blue;
best_tx.red_to_blue_ = red_to_blue;
}
}
}
@ -1034,8 +1207,15 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
*dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
*dst++ = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
#ifdef WEBP_SWAP_16BIT_CSP
*dst++ = ba;
*dst++ = rg;
#else
*dst++ = rg;
*dst++ = ba;
#endif
}
}
@ -1044,8 +1224,15 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
*dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
*dst++ = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
#ifdef WEBP_SWAP_16BIT_CSP
*dst++ = gb;
*dst++ = rg;
#else
*dst++ = rg;
*dst++ = gb;
#endif
}
}
@ -1066,20 +1253,27 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
uint32_t argb = *src++;
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
#if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
__asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
*(uint32_t*)dst = argb;
dst += sizeof(argb);
#elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
argb = _byteswap_ulong(argb);
*(uint32_t*)dst = argb;
dst += sizeof(argb);
#else
*dst++ = (argb >> 24) & 0xff;
*dst++ = (argb >> 16) & 0xff;
*dst++ = (argb >> 8) & 0xff;
*dst++ = (argb >> 0) & 0xff;
dst[0] = (argb >> 24) & 0xff;
dst[1] = (argb >> 16) & 0xff;
dst[2] = (argb >> 8) & 0xff;
dst[3] = (argb >> 0) & 0xff;
#endif
#else // WEBP_REFERENCE_IMPLEMENTATION
dst[0] = (argb >> 24) & 0xff;
dst[1] = (argb >> 16) & 0xff;
dst[2] = (argb >> 8) & 0xff;
dst[3] = (argb >> 0) & 0xff;
#endif
dst += sizeof(argb);
}
} else {
memcpy(dst, src, num_pixels * sizeof(*src));

@ -59,10 +59,20 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
return (size + (1 << sampling_bits) - 1) >> sampling_bits;
}
// Faster logarithm for integers, with the property of log2(0) == 0.
float VP8LFastLog2(int v);
// Faster logarithm for integers. Small values use a look-up table.
#define LOG_LOOKUP_IDX_MAX 256
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
extern float VP8LFastLog2Slow(int v);
extern float VP8LFastSLog2Slow(int v);
static WEBP_INLINE float VP8LFastLog2(int v) {
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
}
// Fast calculation of v * log2(v) for integer input.
static WEBP_INLINE float VP8LFastSLog2(int v) { return VP8LFastLog2(v) * v; }
static WEBP_INLINE float VP8LFastSLog2(int v) {
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
}
// In-place difference of each component with mod 256.
static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {

@ -32,7 +32,7 @@ WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
// ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16
// We process u and v together stashed into 32bit (16bit each).
#define LOAD_UV(u,v) ((u) | ((v) << 16))
#define LOAD_UV(u, v) ((u) | ((v) << 16))
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
@ -327,6 +327,11 @@ void WebPInitUpsamplers(void) {
if (VP8GetCPUInfo(kSSE2)) {
WebPInitUpsamplersSSE2();
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
WebPInitUpsamplersNEON();
}
#endif
}
#endif // FANCY_UPSAMPLING
@ -347,6 +352,11 @@ void WebPInitPremultiply(void) {
if (VP8GetCPUInfo(kSSE2)) {
WebPInitPremultiplySSE2();
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
WebPInitPremultiplyNEON();
}
#endif
}
#endif // FANCY_UPSAMPLING

@ -0,0 +1,292 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
// NEON version of YUV to RGB upsampling functions.
//
// Author: mans@mansr.com (Mans Rullgard)
// Based on SSE code by: somnath@google.com (Somnath Banerjee)
#include "./dsp.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if defined(WEBP_USE_NEON)
#include <assert.h>
#include <arm_neon.h>
#include <string.h>
#include "./yuv.h"
#ifdef FANCY_UPSAMPLING
// Loads 9 pixels each from rows r1 and r2 and generates 16 pixels.
#define UPSAMPLE_16PIXELS(r1, r2, out) { \
uint8x8_t a = vld1_u8(r1); \
uint8x8_t b = vld1_u8(r1 + 1); \
uint8x8_t c = vld1_u8(r2); \
uint8x8_t d = vld1_u8(r2 + 1); \
\
uint16x8_t al = vshll_n_u8(a, 1); \
uint16x8_t bl = vshll_n_u8(b, 1); \
uint16x8_t cl = vshll_n_u8(c, 1); \
uint16x8_t dl = vshll_n_u8(d, 1); \
\
uint8x8_t diag1, diag2; \
uint16x8_t sl; \
\
/* a + b + c + d */ \
sl = vaddl_u8(a, b); \
sl = vaddw_u8(sl, c); \
sl = vaddw_u8(sl, d); \
\
al = vaddq_u16(sl, al); /* 3a + b + c + d */ \
bl = vaddq_u16(sl, bl); /* a + 3b + c + d */ \
\
al = vaddq_u16(al, dl); /* 3a + b + c + 3d */ \
bl = vaddq_u16(bl, cl); /* a + 3b + 3c + d */ \
\
diag2 = vshrn_n_u16(al, 3); \
diag1 = vshrn_n_u16(bl, 3); \
\
a = vrhadd_u8(a, diag1); \
b = vrhadd_u8(b, diag2); \
c = vrhadd_u8(c, diag2); \
d = vrhadd_u8(d, diag1); \
\
{ \
const uint8x8x2_t a_b = {{ a, b }}; \
const uint8x8x2_t c_d = {{ c, d }}; \
vst2_u8(out, a_b); \
vst2_u8(out + 32, c_d); \
} \
}
// Turn the macro into a function for reducing code-size when non-critical
static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
uint8_t *out) {
UPSAMPLE_16PIXELS(r1, r2, out);
}
#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) { \
uint8_t r1[9], r2[9]; \
memcpy(r1, (tb), (num_pixels)); \
memcpy(r2, (bb), (num_pixels)); \
/* replicate last byte */ \
memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels)); \
memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels)); \
Upsample16Pixels(r1, r2, out); \
}
#define CY 76283
#define CVR 89858
#define CUG 22014
#define CVG 45773
#define CUB 113618
static const int16_t coef[4] = { CVR / 4, CUG, CVG / 2, CUB / 4 };
#define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x) { \
int i; \
for (i = 0; i < N; i += 8) { \
int off = ((cur_x) + i) * XSTEP; \
uint8x8_t y = vld1_u8(src_y + (cur_x) + i); \
uint8x8_t u = vld1_u8((src_uv) + i); \
uint8x8_t v = vld1_u8((src_uv) + i + 16); \
int16x8_t yy = vreinterpretq_s16_u16(vsubl_u8(y, u16)); \
int16x8_t uu = vreinterpretq_s16_u16(vsubl_u8(u, u128)); \
int16x8_t vv = vreinterpretq_s16_u16(vsubl_u8(v, u128)); \
\
int16x8_t ud = vshlq_n_s16(uu, 1); \
int16x8_t vd = vshlq_n_s16(vv, 1); \
\
int32x4_t vrl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(vv), 1), \
vget_low_s16(vd), cf16, 0); \
int32x4_t vrh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(vv), 1), \
vget_high_s16(vd), cf16, 0); \
int16x8_t vr = vcombine_s16(vrshrn_n_s32(vrl, 16), \
vrshrn_n_s32(vrh, 16)); \
\
int32x4_t vl = vmovl_s16(vget_low_s16(vv)); \
int32x4_t vh = vmovl_s16(vget_high_s16(vv)); \
int32x4_t ugl = vmlal_lane_s16(vl, vget_low_s16(uu), cf16, 1); \
int32x4_t ugh = vmlal_lane_s16(vh, vget_high_s16(uu), cf16, 1); \
int32x4_t gcl = vqdmlal_lane_s16(ugl, vget_low_s16(vv), cf16, 2); \
int32x4_t gch = vqdmlal_lane_s16(ugh, vget_high_s16(vv), cf16, 2); \
int16x8_t gc = vcombine_s16(vrshrn_n_s32(gcl, 16), \
vrshrn_n_s32(gch, 16)); \
\
int32x4_t ubl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(uu), 1), \
vget_low_s16(ud), cf16, 3); \
int32x4_t ubh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(uu), 1), \
vget_high_s16(ud), cf16, 3); \
int16x8_t ub = vcombine_s16(vrshrn_n_s32(ubl, 16), \
vrshrn_n_s32(ubh, 16)); \
\
int32x4_t rl = vaddl_s16(vget_low_s16(yy), vget_low_s16(vr)); \
int32x4_t rh = vaddl_s16(vget_high_s16(yy), vget_high_s16(vr)); \
int32x4_t gl = vsubl_s16(vget_low_s16(yy), vget_low_s16(gc)); \
int32x4_t gh = vsubl_s16(vget_high_s16(yy), vget_high_s16(gc)); \
int32x4_t bl = vaddl_s16(vget_low_s16(yy), vget_low_s16(ub)); \
int32x4_t bh = vaddl_s16(vget_high_s16(yy), vget_high_s16(ub)); \
\
rl = vmulq_lane_s32(rl, cf32, 0); \
rh = vmulq_lane_s32(rh, cf32, 0); \
gl = vmulq_lane_s32(gl, cf32, 0); \
gh = vmulq_lane_s32(gh, cf32, 0); \
bl = vmulq_lane_s32(bl, cf32, 0); \
bh = vmulq_lane_s32(bh, cf32, 0); \
\
y = vqmovun_s16(vcombine_s16(vrshrn_n_s32(rl, 16), \
vrshrn_n_s32(rh, 16))); \
u = vqmovun_s16(vcombine_s16(vrshrn_n_s32(gl, 16), \
vrshrn_n_s32(gh, 16))); \
v = vqmovun_s16(vcombine_s16(vrshrn_n_s32(bl, 16), \
vrshrn_n_s32(bh, 16))); \
STR_ ## FMT(out + off, y, u, v); \
} \
}
#define v255 vmov_n_u8(255)
#define STR_Rgb(out, r, g, b) do { \
const uint8x8x3_t r_g_b = {{ r, g, b }}; \
vst3_u8(out, r_g_b); \
} while (0)
#define STR_Bgr(out, r, g, b) do { \
const uint8x8x3_t b_g_r = {{ b, g, r }}; \
vst3_u8(out, b_g_r); \
} while (0)
#define STR_Rgba(out, r, g, b) do { \
const uint8x8x4_t r_g_b_v255 = {{ r, g, b, v255 }}; \
vst4_u8(out, r_g_b_v255); \
} while (0)
#define STR_Bgra(out, r, g, b) do { \
const uint8x8x4_t b_g_r_v255 = {{ b, g, r, v255 }}; \
vst4_u8(out, b_g_r_v255); \
} while (0)
#define CONVERT1(FMT, XSTEP, N, src_y, src_uv, rgb, cur_x) { \
int i; \
for (i = 0; i < N; i++) { \
int off = ((cur_x) + i) * XSTEP; \
int y = src_y[(cur_x) + i]; \
int u = (src_uv)[i]; \
int v = (src_uv)[i + 16]; \
VP8YuvTo ## FMT(y, u, v, rgb + off); \
} \
}
#define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv, \
top_dst, bottom_dst, cur_x, len) { \
if (top_y) { \
CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x) \
} \
if (bottom_y) { \
CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x) \
} \
}
#define CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, uv, \
top_dst, bottom_dst, cur_x, len) { \
if (top_y) { \
CONVERT1(FMT, XSTEP, len, top_y, uv, top_dst, cur_x); \
} \
if (bottom_y) { \
CONVERT1(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x); \
} \
}
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \
static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \
const uint8_t *top_u, const uint8_t *top_v, \
const uint8_t *cur_u, const uint8_t *cur_v, \
uint8_t *top_dst, uint8_t *bottom_dst, int len) { \
int block; \
/* 16 byte aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[2 * 32 + 15]; \
uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
const int uv_len = (len + 1) >> 1; \
/* 9 pixels must be read-able for each block */ \
const int num_blocks = (uv_len - 1) >> 3; \
const int leftover = uv_len - num_blocks * 8; \
const int last_pos = 1 + 16 * num_blocks; \
\
const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \
const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \
\
const int16x4_t cf16 = vld1_s16(coef); \
const int32x2_t cf32 = vmov_n_s32(CY); \
const uint8x8_t u16 = vmov_n_u8(16); \
const uint8x8_t u128 = vmov_n_u8(128); \
\
/* Treat the first pixel in regular way */ \
if (top_y) { \
const int u0 = (top_u[0] + u_diag) >> 1; \
const int v0 = (top_v[0] + v_diag) >> 1; \
VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst); \
} \
if (bottom_y) { \
const int u0 = (cur_u[0] + u_diag) >> 1; \
const int v0 = (cur_v[0] + v_diag) >> 1; \
VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst); \
} \
\
for (block = 0; block < num_blocks; ++block) { \
UPSAMPLE_16PIXELS(top_u, cur_u, r_uv); \
UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16); \
CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, \
top_dst, bottom_dst, 16 * block + 1, 16); \
top_u += 8; \
cur_u += 8; \
top_v += 8; \
cur_v += 8; \
} \
\
UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv); \
UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16); \
CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, r_uv, \
top_dst, bottom_dst, last_pos, len - last_pos); \
}
// NEON variants of the fancy upsampler.
NEON_UPSAMPLE_FUNC(UpsampleRgbLinePairNEON, Rgb, 3)
NEON_UPSAMPLE_FUNC(UpsampleBgrLinePairNEON, Bgr, 3)
NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePairNEON, Rgba, 4)
NEON_UPSAMPLE_FUNC(UpsampleBgraLinePairNEON, Bgra, 4)
#endif // FANCY_UPSAMPLING
#endif // WEBP_USE_NEON
//------------------------------------------------------------------------------
extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
void WebPInitUpsamplersNEON(void) {
#if defined(WEBP_USE_NEON)
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairNEON;
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairNEON;
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairNEON;
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairNEON;
#endif // WEBP_USE_NEON
}
void WebPInitPremultiplyNEON(void) {
#if defined(WEBP_USE_NEON)
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairNEON;
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairNEON;
#endif // WEBP_USE_NEON
}
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

@ -51,12 +51,12 @@ extern "C" {
// pack and store two alterning pixel rows
#define PACK_AND_STORE(a, b, da, db, out) do { \
const __m128i ta = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \
const __m128i tb = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \
const __m128i t1 = _mm_unpacklo_epi8(ta, tb); \
const __m128i t2 = _mm_unpackhi_epi8(ta, tb); \
_mm_store_si128(((__m128i*)(out)) + 0, t1); \
_mm_store_si128(((__m128i*)(out)) + 1, t2); \
const __m128i t_a = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \
const __m128i t_b = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \
const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b); \
const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b); \
_mm_store_si128(((__m128i*)(out)) + 0, t_1); \
_mm_store_si128(((__m128i*)(out)) + 1, t_2); \
} while (0)
// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
@ -128,7 +128,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
const uint8_t* top_u, const uint8_t* top_v, \
const uint8_t* cur_u, const uint8_t* cur_v, \
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
int b; \
int block; \
/* 16 byte aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[4 * 32 + 15]; \
uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
@ -154,11 +154,11 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
FUNC(bottom_y[0], u0, v0, bottom_dst); \
} \
\
for (b = 0; b < num_blocks; ++b) { \
for (block = 0; block < num_blocks; ++block) { \
UPSAMPLE_32PIXELS(top_u, cur_u, r_uv + 0 * 32); \
UPSAMPLE_32PIXELS(top_v, cur_v, r_uv + 1 * 32); \
CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst, \
32 * b + 1, 32) \
32 * block + 1, 32) \
top_u += 16; \
cur_u += 16; \
top_v += 16; \
@ -211,3 +211,5 @@ void WebPInitPremultiplySSE2(void) {
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

@ -15,7 +15,7 @@
extern "C" {
#endif
enum { YUV_HALF = 1 << (YUV_FIX - 1) };
#ifdef WEBP_YUV_USE_TABLE
int16_t VP8kVToR[256], VP8kUToB[256];
int32_t VP8kVToG[256], VP8kUToG[256];
@ -33,6 +33,7 @@ void VP8YUVInit(void) {
if (done) {
return;
}
#ifndef USE_YUVj
for (i = 0; i < 256; ++i) {
VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
@ -44,9 +45,29 @@ void VP8YUVInit(void) {
VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
}
#else
for (i = 0; i < 256; ++i) {
VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
VP8kVToG[i] = -46802 * (i - 128);
VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
}
for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
const int k = i;
VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
}
#endif
done = 1;
}
#else
void VP8YUVInit(void) {}
#endif // WEBP_YUV_USE_TABLE
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

@ -7,6 +7,26 @@
//
// inline YUV<->RGB conversion function
//
// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
// More information at: http://en.wikipedia.org/wiki/YCbCr
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
// We use 16bit fixed point operations for RGB->YUV conversion.
//
// For the Y'CbCr to RGB conversion, the BT.601 specification reads:
// R = 1.164 * (Y-16) + 1.596 * (V-128)
// G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128)
// B = 1.164 * (Y-16) + 2.018 * (U-128)
// where Y is in the [16,235] range, and U/V in the [16,240] range.
// In the table-lookup version (WEBP_YUV_USE_TABLE), the common factor
// "1.164 * (Y-16)" can be handled as an offset in the VP8kClip[] table.
// So in this case the formulae should be read as:
// R = 1.164 * [Y + 1.371 * (V-128) ] - 18.624
// G = 1.164 * [Y - 0.698 * (V-128) - 0.336 * (U-128)] - 18.624
// B = 1.164 * [Y + 1.733 * (U-128)] - 18.624
// once factorized. Here too, 16bit fixed precision is used.
//
// Author: Skal (pascal.massimino@gmail.com)
#ifndef WEBP_DSP_YUV_H_
@ -14,6 +34,19 @@
#include "../dec/decode_vp8.h"
// Define the following to use the LUT-based code:
#define WEBP_YUV_USE_TABLE
#if defined(WEBP_EXPERIMENTAL_FEATURES)
// Do NOT activate this feature for real compression. This is only experimental!
// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace.
// This colorspace is close to Rec.601's Y'CbCr model with the notable
// difference of allowing larger range for luma/chroma.
// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its
// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
// #define USE_YUVj
#endif
//------------------------------------------------------------------------------
// YUV -> RGB conversion
@ -22,9 +55,14 @@ extern "C" {
#endif
enum { YUV_FIX = 16, // fixed-point precision
YUV_HALF = 1 << (YUV_FIX - 1),
YUV_MASK = (256 << YUV_FIX) - 1,
YUV_RANGE_MIN = -227, // min value of r/g/b output
YUV_RANGE_MAX = 256 + 226 // max value of r/g/b output
};
#ifdef WEBP_YUV_USE_TABLE
extern int16_t VP8kVToR[256], VP8kUToB[256];
extern int32_t VP8kVToG[256], VP8kUToG[256];
extern uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
@ -40,21 +78,32 @@ static WEBP_INLINE void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
rgb[2] = VP8kClip[y + b_off - YUV_RANGE_MIN];
}
static WEBP_INLINE void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const rgb) {
static WEBP_INLINE void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const bgr) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
rgb[0] = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
(VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
rgb[1] = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
(VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
bgr[0] = VP8kClip[y + b_off - YUV_RANGE_MIN];
bgr[1] = VP8kClip[y + g_off - YUV_RANGE_MIN];
bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN];
}
static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
argb[0] = 0xff;
VP8YuvToRgb(y, u, v, argb + 1);
static WEBP_INLINE void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const rgb) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
const uint8_t rg = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
(VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
const uint8_t gb = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
(VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
#ifdef WEBP_SWAP_16BIT_CSP
rgb[0] = gb;
rgb[1] = rg;
#else
rgb[0] = rg;
rgb[1] = gb;
#endif
}
static WEBP_INLINE void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
@ -62,20 +111,104 @@ static WEBP_INLINE void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
// Don't update alpha (last 4 bits of argb[1])
argb[0] = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
argb[1] = 0x0f | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
const uint8_t rg = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
const uint8_t ba = (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4) | 0x0f;
#ifdef WEBP_SWAP_16BIT_CSP
argb[0] = ba;
argb[1] = rg;
#else
argb[0] = rg;
argb[1] = ba;
#endif
}
#else // Table-free version (slower on x86)
// These constants are 16b fixed-point version of ITU-R BT.601 constants
#define kYScale 76309 // 1.164 = 255 / 219
#define kVToR 104597 // 1.596 = 255 / 112 * 0.701
#define kUToG 25674 // 0.391 = 255 / 112 * 0.886 * 0.114 / 0.587
#define kVToG 53278 // 0.813 = 255 / 112 * 0.701 * 0.299 / 0.587
#define kUToB 132201 // 2.018 = 255 / 112 * 0.886
#define kRCst (-kYScale * 16 - kVToR * 128 + YUV_HALF)
#define kGCst (-kYScale * 16 + kUToG * 128 + kVToG * 128 + YUV_HALF)
#define kBCst (-kYScale * 16 - kUToB * 128 + YUV_HALF)
static WEBP_INLINE uint8_t VP8Clip8(int v) {
return ((v & ~YUV_MASK) == 0) ? (uint8_t)(v >> YUV_FIX)
: (v < 0) ? 0u : 255u;
}
static WEBP_INLINE uint8_t VP8ClipN(int v, int N) { // clip to N bits
return ((v & ~YUV_MASK) == 0) ? (uint8_t)(v >> (YUV_FIX + (8 - N)))
: (v < 0) ? 0u : (255u >> (8 - N));
}
static WEBP_INLINE int VP8YUVToR(int y, int v) {
return kYScale * y + kVToR * v + kRCst;
}
static WEBP_INLINE int VP8YUVToG(int y, int u, int v) {
return kYScale * y - kUToG * u - kVToG * v + kGCst;
}
static WEBP_INLINE int VP8YUVToB(int y, int u) {
return kYScale * y + kUToB * u + kBCst;
}
static WEBP_INLINE void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const rgb) {
rgb[0] = VP8Clip8(VP8YUVToR(y, v));
rgb[1] = VP8Clip8(VP8YUVToG(y, u, v));
rgb[2] = VP8Clip8(VP8YUVToB(y, u));
}
static WEBP_INLINE void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const bgr) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
bgr[0] = VP8kClip[y + b_off - YUV_RANGE_MIN];
bgr[1] = VP8kClip[y + g_off - YUV_RANGE_MIN];
bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN];
bgr[0] = VP8Clip8(VP8YUVToB(y, u));
bgr[1] = VP8Clip8(VP8YUVToG(y, u, v));
bgr[2] = VP8Clip8(VP8YUVToR(y, v));
}
static WEBP_INLINE void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const rgb) {
const int r = VP8Clip8(VP8YUVToR(y, u));
const int g = VP8ClipN(VP8YUVToG(y, u, v), 6);
const int b = VP8ClipN(VP8YUVToB(y, v), 5);
const uint8_t rg = (r & 0xf8) | (g >> 3);
const uint8_t gb = (g << 5) | b;
#ifdef WEBP_SWAP_16BIT_CSP
rgb[0] = gb;
rgb[1] = rg;
#else
rgb[0] = rg;
rgb[1] = gb;
#endif
}
static WEBP_INLINE void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
const int r = VP8Clip8(VP8YUVToR(y, u));
const int g = VP8ClipN(VP8YUVToG(y, u, v), 4);
const int b = VP8Clip8(VP8YUVToB(y, v));
const uint8_t rg = (r & 0xf0) | g;
const uint8_t ba = b | 0x0f; // overwrite the lower 4 bits
#ifdef WEBP_SWAP_16BIT_CSP
argb[0] = ba;
argb[1] = rg;
#else
argb[0] = rg;
argb[1] = ba;
#endif
}
#endif // WEBP_YUV_USE_TABLE
static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
argb[0] = 0xff;
VP8YuvToRgb(y, u, v, argb + 1);
}
static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
@ -95,18 +228,14 @@ void VP8YUVInit(void);
//------------------------------------------------------------------------------
// RGB -> YUV conversion
// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
// More information at: http://en.wikipedia.org/wiki/YCbCr
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
// We use 16bit fixed point operations.
static WEBP_INLINE int VP8ClipUV(int v) {
v = (v + (257 << (YUV_FIX + 2 - 1))) >> (YUV_FIX + 2);
return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
v = (v + (257 << (YUV_FIX + 2 - 1))) >> (YUV_FIX + 2);
return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
}
#ifndef USE_YUVj
static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
const int kRound = (1 << (YUV_FIX - 1)) + (16 << YUV_FIX);
const int luma = 16839 * r + 33059 * g + 6420 * b;
@ -114,13 +243,38 @@ static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
}
static WEBP_INLINE int VP8RGBToU(int r, int g, int b) {
return VP8ClipUV(-9719 * r - 19081 * g + 28800 * b);
const int u = -9719 * r - 19081 * g + 28800 * b;
return VP8ClipUV(u);
}
static WEBP_INLINE int VP8RGBToV(int r, int g, int b) {
return VP8ClipUV(+28800 * r - 24116 * g - 4684 * b);
const int v = +28800 * r - 24116 * g - 4684 * b;
return VP8ClipUV(v);
}
#else
// This JPEG-YUV colorspace, only for comparison!
// These are also 16-bit precision coefficients from Rec.601, but with full
// [0..255] output range.
static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
const int kRound = (1 << (YUV_FIX - 1));
const int luma = 19595 * r + 38470 * g + 7471 * b;
return (luma + kRound) >> YUV_FIX; // no need to clip
}
static WEBP_INLINE int VP8RGBToU(int r, int g, int b) {
const int u = -11058 * r - 21710 * g + 32768 * b;
return VP8ClipUV(u);
}
static WEBP_INLINE int VP8RGBToV(int r, int g, int b) {
const int v = 32768 * r - 27439 * g - 5329 * b;
return VP8ClipUV(v);
}
#endif // USE_YUVj
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

@ -79,18 +79,17 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
WebPConfigInit(&config);
config.lossless = 1;
config.method = effort_level; // impact is very small
// Set moderate default quality setting for alpha. Higher qualities (80 and
// above) could be very slow.
config.quality = 10.f + 15.f * effort_level;
if (config.quality > 100.f) config.quality = 100.f;
// Set a moderate default quality setting for alpha.
config.quality = 5.f * effort_level;
assert(config.quality >= 0 && config.quality <= 100.f);
ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK);
WebPPictureFree(&picture);
if (ok) {
const uint8_t* const data = VP8LBitWriterFinish(&tmp_bw);
const size_t data_size = VP8LBitWriterNumBytes(&tmp_bw);
VP8BitWriterAppend(bw, data, data_size);
const uint8_t* const buffer = VP8LBitWriterFinish(&tmp_bw);
const size_t buffer_size = VP8LBitWriterNumBytes(&tmp_bw);
VP8BitWriterAppend(bw, buffer, buffer_size);
}
VP8LBitWriterDestroy(&tmp_bw);
return ok && !bw->error_;
@ -128,8 +127,8 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
VP8BitWriterAppend(bw, &header, ALPHA_HEADER_LEN);
filter_func = WebPFilters[filter];
if (filter_func) {
filter_func(data, width, height, 1, width, tmp_alpha);
if (filter_func != NULL) {
filter_func(data, width, height, width, tmp_alpha);
alpha_src = tmp_alpha;
} else {
alpha_src = data;
@ -287,42 +286,80 @@ static int EncodeAlpha(VP8Encoder* const enc,
//------------------------------------------------------------------------------
// Main calls
static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
const WebPConfig* config = enc->config_;
uint8_t* alpha_data = NULL;
size_t alpha_size = 0;
const int effort_level = config->method; // maps to [0..6]
const WEBP_FILTER_TYPE filter =
(config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
(config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
WEBP_FILTER_BEST;
if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
filter, effort_level, &alpha_data, &alpha_size)) {
return 0;
}
if (alpha_size != (uint32_t)alpha_size) { // Sanity check.
free(alpha_data);
return 0;
}
enc->alpha_data_size_ = (uint32_t)alpha_size;
enc->alpha_data_ = alpha_data;
(void)dummy;
return 1;
}
void VP8EncInitAlpha(VP8Encoder* const enc) {
enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
WebPWorkerInit(worker);
worker->data1 = enc;
worker->data2 = NULL;
worker->hook = (WebPWorkerHook)CompressAlphaJob;
}
}
int VP8EncFinishAlpha(VP8Encoder* const enc) {
int VP8EncStartAlpha(VP8Encoder* const enc) {
if (enc->has_alpha_) {
const WebPConfig* config = enc->config_;
uint8_t* tmp_data = NULL;
size_t tmp_size = 0;
const int effort_level = config->method; // maps to [0..6]
const WEBP_FILTER_TYPE filter =
(config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
(config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
WEBP_FILTER_BEST;
if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
filter, effort_level, &tmp_data, &tmp_size)) {
return 0;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
if (!WebPWorkerReset(worker)) { // Makes sure worker is good to go.
return 0;
}
WebPWorkerLaunch(worker);
return 1;
} else {
return CompressAlphaJob(enc, NULL); // just do the job right away
}
if (tmp_size != (uint32_t)tmp_size) { // Sanity check.
free(tmp_data);
return 0;
}
return 1;
}
int VP8EncFinishAlpha(VP8Encoder* const enc) {
if (enc->has_alpha_) {
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
if (!WebPWorkerSync(worker)) return 0; // error
}
enc->alpha_data_size_ = (uint32_t)tmp_size;
enc->alpha_data_ = tmp_data;
}
return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
}
void VP8EncDeleteAlpha(VP8Encoder* const enc) {
int VP8EncDeleteAlpha(VP8Encoder* const enc) {
int ok = 1;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
ok = WebPWorkerSync(worker); // finish anything left in flight
WebPWorkerEnd(worker); // still need to end the worker, even if !ok
}
free(enc->alpha_data_);
enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0;
enc->has_alpha_ = 0;
return ok;
}
#if defined(__cplusplus) || defined(c_plusplus)

@ -23,10 +23,6 @@ extern "C" {
#define MAX_ITERS_K_MEANS 6
static int ClipAlpha(int alpha) {
return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
}
//------------------------------------------------------------------------------
// Smooth the segment map by replacing isolated block by the majority of its
// neighbours.
@ -72,50 +68,10 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
}
//------------------------------------------------------------------------------
// Finalize Segment probability based on the coding tree
static int GetProba(int a, int b) {
int proba;
const int total = a + b;
if (total == 0) return 255; // that's the default probability.
proba = (255 * a + total / 2) / total;
return proba;
}
static void SetSegmentProbas(VP8Encoder* const enc) {
int p[NUM_MB_SEGMENTS] = { 0 };
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
const VP8MBInfo* const mb = &enc->mb_info_[n];
p[mb->segment_]++;
}
if (enc->pic_->stats) {
for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
enc->pic_->stats->segment_size[n] = p[n];
}
}
if (enc->segment_hdr_.num_segments_ > 1) {
uint8_t* const probas = enc->proba_.segments_;
probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
probas[1] = GetProba(p[0], p[1]);
probas[2] = GetProba(p[2], p[3]);
enc->segment_hdr_.update_map_ =
(probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
enc->segment_hdr_.size_ =
p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
} else {
enc->segment_hdr_.update_map_ = 0;
enc->segment_hdr_.size_ = 0;
}
}
// set segment susceptibility alpha_ / beta_
static WEBP_INLINE int clip(int v, int m, int M) {
return v < m ? m : v > M ? M : v;
return (v < m) ? m : (v > M) ? M : v;
}
static void SetSegmentAlphas(VP8Encoder* const enc,
@ -141,23 +97,64 @@ static void SetSegmentAlphas(VP8Encoder* const enc,
}
}
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
#define MAX_ALPHA 255 // 8b of precision for susceptibilities.
#define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha.
#define DEFAULT_ALPHA (-1)
#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
static int FinalAlphaValue(int alpha) {
alpha = MAX_ALPHA - alpha;
return clip(alpha, 0, MAX_ALPHA);
}
static int GetAlpha(const VP8Histogram* const histo) {
int max_value = 0, last_non_zero = 1;
int k;
int alpha;
for (k = 0; k <= MAX_COEFF_THRESH; ++k) {
const int value = histo->distribution[k];
if (value > 0) {
if (value > max_value) max_value = value;
last_non_zero = k;
}
}
// 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
// values which happen to be mostly noise. This leaves the maximum precision
// for handling the useful small values which contribute most.
alpha = (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
return alpha;
}
static void MergeHistograms(const VP8Histogram* const in,
VP8Histogram* const out) {
int i;
for (i = 0; i <= MAX_COEFF_THRESH; ++i) {
out->distribution[i] += in->distribution[i];
}
}
//------------------------------------------------------------------------------
// Simplified k-Means, to assign Nb segments based on alpha-histogram
static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
static void AssignSegments(VP8Encoder* const enc,
const int alphas[MAX_ALPHA + 1]) {
const int nb = enc->segment_hdr_.num_segments_;
int centers[NUM_MB_SEGMENTS];
int weighted_average = 0;
int map[256];
int map[MAX_ALPHA + 1];
int a, n, k;
int min_a = 0, max_a = 255, range_a;
int min_a = 0, max_a = MAX_ALPHA, range_a;
// 'int' type is ok for histo, and won't overflow
int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
// bracket the input
for (n = 0; n < 256 && alphas[n] == 0; ++n) {}
for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {}
min_a = n;
for (n = 255; n > min_a && alphas[n] == 0; --n) {}
for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {}
max_a = n;
range_a = max_a - min_a;
@ -210,7 +207,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
VP8MBInfo* const mb = &enc->mb_info_[n];
const int alpha = mb->alpha_;
mb->segment_ = map[alpha];
mb->alpha_ = centers[map[alpha]]; // just for the record.
mb->alpha_ = centers[map[alpha]]; // for the record.
}
if (nb > 1) {
@ -218,7 +215,6 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
if (smooth) SmoothSegmentMap(enc);
}
SetSegmentProbas(enc); // Assign final proba
SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas.
}
@ -227,24 +223,32 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
// susceptibility and set best modes for this macroblock.
// Segment assignment is done later.
// Number of modes to inspect for alpha_ evaluation. For high-quality settings,
// we don't need to test all the possible modes during the analysis phase.
// Number of modes to inspect for alpha_ evaluation. For high-quality settings
// (method >= FAST_ANALYSIS_METHOD) we don't need to test all the possible modes
// during the analysis phase.
#define FAST_ANALYSIS_METHOD 4 // method above which we do partial analysis
#define MAX_INTRA16_MODE 2
#define MAX_INTRA4_MODE 2
#define MAX_UV_MODE 2
static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA16_MODE : 4;
const int max_mode =
(it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_INTRA16_MODE
: NUM_PRED_MODES;
int mode;
int best_alpha = -1;
int best_alpha = DEFAULT_ALPHA;
int best_mode = 0;
VP8MakeLuma16Preds(it);
for (mode = 0; mode < max_mode; ++mode) {
const int alpha = VP8CollectHistogram(it->yuv_in_ + Y_OFF,
it->yuv_p_ + VP8I16ModeOffsets[mode],
0, 16);
if (alpha > best_alpha) {
VP8Histogram histo = { { 0 } };
int alpha;
VP8CollectHistogram(it->yuv_in_ + Y_OFF,
it->yuv_p_ + VP8I16ModeOffsets[mode],
0, 16, &histo);
alpha = GetAlpha(&histo);
if (IS_BETTER_ALPHA(alpha, best_alpha)) {
best_alpha = alpha;
best_mode = mode;
}
@ -256,46 +260,63 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
int best_alpha) {
uint8_t modes[16];
const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES;
int i4_alpha = 0;
const int max_mode =
(it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_INTRA4_MODE
: NUM_BMODES;
int i4_alpha;
VP8Histogram total_histo = { { 0 } };
int cur_histo = 0;
VP8IteratorStartI4(it);
do {
int mode;
int best_mode_alpha = -1;
int best_mode_alpha = DEFAULT_ALPHA;
VP8Histogram histos[2];
const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
VP8MakeIntra4Preds(it);
for (mode = 0; mode < max_mode; ++mode) {
const int alpha = VP8CollectHistogram(src,
it->yuv_p_ + VP8I4ModeOffsets[mode],
0, 1);
if (alpha > best_mode_alpha) {
int alpha;
memset(&histos[cur_histo], 0, sizeof(histos[cur_histo]));
VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode],
0, 1, &histos[cur_histo]);
alpha = GetAlpha(&histos[cur_histo]);
if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) {
best_mode_alpha = alpha;
modes[it->i4_] = mode;
cur_histo ^= 1; // keep track of best histo so far.
}
}
i4_alpha += best_mode_alpha;
// accumulate best histogram
MergeHistograms(&histos[cur_histo ^ 1], &total_histo);
// Note: we reuse the original samples for predictors
} while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
if (i4_alpha > best_alpha) {
i4_alpha = GetAlpha(&total_histo);
if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) {
VP8SetIntra4Mode(it, modes);
best_alpha = ClipAlpha(i4_alpha);
best_alpha = i4_alpha;
}
return best_alpha;
}
static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
int best_alpha = -1;
int best_alpha = DEFAULT_ALPHA;
int best_mode = 0;
const int max_mode = (it->enc_->method_ >= 3) ? MAX_UV_MODE : 4;
const int max_mode =
(it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_UV_MODE
: NUM_PRED_MODES;
int mode;
VP8MakeChroma8Preds(it);
for (mode = 0; mode < max_mode; ++mode) {
const int alpha = VP8CollectHistogram(it->yuv_in_ + U_OFF,
it->yuv_p_ + VP8UVModeOffsets[mode],
16, 16 + 4 + 4);
if (alpha > best_alpha) {
VP8Histogram histo = { { 0 } };
int alpha;
VP8CollectHistogram(it->yuv_in_ + U_OFF,
it->yuv_p_ + VP8UVModeOffsets[mode],
16, 16 + 4 + 4, &histo);
alpha = GetAlpha(&histo);
if (IS_BETTER_ALPHA(alpha, best_alpha)) {
best_alpha = alpha;
best_mode = mode;
}
@ -305,7 +326,8 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
}
static void MBAnalyze(VP8EncIterator* const it,
int alphas[256], int* const uv_alpha) {
int alphas[MAX_ALPHA + 1],
int* const alpha, int* const uv_alpha) {
const VP8Encoder* const enc = it->enc_;
int best_alpha, best_uv_alpha;
@ -314,7 +336,7 @@ static void MBAnalyze(VP8EncIterator* const it,
VP8SetSegment(it, 0); // default segment, spec-wise.
best_alpha = MBAnalyzeBestIntra16Mode(it);
if (enc->method_ != 3) {
if (enc->method_ >= 5) {
// We go and make a fast decision for intra4/intra16.
// It's usually not a good and definitive pick, but helps seeding the stats
// about level bit-cost.
@ -324,10 +346,22 @@ static void MBAnalyze(VP8EncIterator* const it,
best_uv_alpha = MBAnalyzeBestUVMode(it);
// Final susceptibility mix
best_alpha = (best_alpha + best_uv_alpha + 1) / 2;
best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
best_alpha = FinalAlphaValue(best_alpha);
alphas[best_alpha]++;
it->mb_->alpha_ = best_alpha; // for later remapping.
// Accumulate for later complexity analysis.
*alpha += best_alpha; // mixed susceptibility (not just luma)
*uv_alpha += best_uv_alpha;
it->mb_->alpha_ = best_alpha; // Informative only.
}
static void DefaultMBInfo(VP8MBInfo* const mb) {
mb->type_ = 1; // I16x16
mb->uv_mode_ = 0;
mb->skip_ = 0; // not skipped
mb->segment_ = 0; // default segment
mb->alpha_ = 0;
}
//------------------------------------------------------------------------------
@ -340,22 +374,43 @@ static void MBAnalyze(VP8EncIterator* const it,
// and decide intra4/intra16, but that's usually almost always a bad choice at
// this stage.
static void ResetAllMBInfo(VP8Encoder* const enc) {
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
DefaultMBInfo(&enc->mb_info_[n]);
}
// Default susceptibilities.
enc->dqm_[0].alpha_ = 0;
enc->dqm_[0].beta_ = 0;
// Note: we can't compute this alpha_ / uv_alpha_.
WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
}
int VP8EncAnalyze(VP8Encoder* const enc) {
int ok = 1;
int alphas[256] = { 0 };
VP8EncIterator it;
VP8IteratorInit(enc, &it);
const int do_segments =
enc->config_->emulate_jpeg_size || // We need the complexity evaluation.
(enc->segment_hdr_.num_segments_ > 1) ||
(enc->method_ == 0); // for method 0, we need preds_[] to be filled.
enc->alpha_ = 0;
enc->uv_alpha_ = 0;
do {
VP8IteratorImport(&it);
MBAnalyze(&it, alphas, &enc->uv_alpha_);
ok = VP8IteratorProgress(&it, 20);
// Let's pretend we have perfect lossless reconstruction.
} while (ok && VP8IteratorNext(&it, it.yuv_in_));
enc->uv_alpha_ /= enc->mb_w_ * enc->mb_h_;
if (ok) AssignSegments(enc, alphas);
if (do_segments) {
int alphas[MAX_ALPHA + 1] = { 0 };
VP8EncIterator it;
VP8IteratorInit(enc, &it);
do {
VP8IteratorImport(&it);
MBAnalyze(&it, alphas, &enc->alpha_, &enc->uv_alpha_);
ok = VP8IteratorProgress(&it, 20);
// Let's pretend we have perfect lossless reconstruction.
} while (ok && VP8IteratorNext(&it, it.yuv_in_));
enc->alpha_ /= enc->mb_w_ * enc->mb_h_;
enc->uv_alpha_ /= enc->mb_w_ * enc->mb_h_;
if (ok) AssignSegments(enc, alphas);
} else { // Use only one default segment.
ResetAllMBInfo(enc);
}
return ok;
}

@ -141,21 +141,35 @@ static void HashChainInsert(HashChain* const p,
p->hash_to_first_index_[hash_code] = pos;
}
static void GetParamsForHashChainFindCopy(int quality, int xsize,
int* window_size, int* iter_pos,
int* iter_limit) {
const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
// Limit the backward-ref window size for lower qualities.
const int max_window_size = (quality > 50) ? WINDOW_SIZE
: (quality > 25) ? (xsize << 8)
: (xsize << 4);
assert(xsize > 0);
*window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE
: max_window_size;
*iter_pos = 5 + (quality >> 3);
*iter_limit = -quality * iter_mult;
}
static int HashChainFindCopy(const HashChain* const p,
int quality, int index, int xsize,
int base_position, int xsize,
const uint32_t* const argb, int maxlen,
int window_size, int iter_pos, int iter_limit,
int* const distance_ptr,
int* const length_ptr) {
const uint64_t hash_code = GetPixPairHash64(&argb[index]);
const uint64_t hash_code = GetPixPairHash64(&argb[base_position]);
int prev_length = 0;
int64_t best_val = 0;
int best_length = 0;
int best_distance = 0;
const uint32_t* const argb_start = argb + index;
const int iter_min_mult = (quality < 50) ? 2 : (quality < 75) ? 4 : 8;
const int iter_min = -quality * iter_min_mult;
int iter_cnt = 10 + (quality >> 1);
const int min_pos = (index > WINDOW_SIZE) ? index - WINDOW_SIZE : 0;
const uint32_t* const argb_start = argb + base_position;
const int min_pos =
(base_position > window_size) ? base_position - window_size : 0;
int pos;
assert(xsize > 0);
@ -164,12 +178,12 @@ static int HashChainFindCopy(const HashChain* const p,
pos = p->chain_[pos]) {
int64_t val;
int curr_length;
if (iter_cnt < 0) {
if (iter_cnt < iter_min || best_val >= 0xff0000) {
if (iter_pos < 0) {
if (iter_pos < iter_limit || best_val >= 0xff0000) {
break;
}
}
--iter_cnt;
--iter_pos;
if (best_length != 0 &&
argb[pos + best_length - 1] != argb_start[best_length - 1]) {
continue;
@ -180,9 +194,9 @@ static int HashChainFindCopy(const HashChain* const p,
}
val = 65536 * curr_length;
// Favoring 2d locality here gives savings for certain images.
if (index - pos < 9 * xsize) {
const int y = (index - pos) / xsize;
int x = (index - pos) % xsize;
if (base_position - pos < 9 * xsize) {
const int y = (base_position - pos) / xsize;
int x = (base_position - pos) % xsize;
if (x > xsize / 2) {
x = xsize - x;
}
@ -198,7 +212,7 @@ static int HashChainFindCopy(const HashChain* const p,
prev_length = curr_length;
best_val = val;
best_length = curr_length;
best_distance = index - pos;
best_distance = base_position - pos;
if (curr_length >= MAX_LENGTH) {
break;
}
@ -257,6 +271,9 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
const int pix_count = xsize * ysize;
HashChain* const hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
VP8LColorCache hashers;
int window_size = WINDOW_SIZE;
int iter_pos = 1;
int iter_limit = -1;
if (hash_chain == NULL) return 0;
if (use_color_cache) {
@ -267,6 +284,8 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
if (!HashChainInit(hash_chain, pix_count)) goto Error;
refs->size = 0;
GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos,
&iter_limit);
for (i = 0; i < pix_count; ) {
// Alternative#1: Code the pixels starting at 'i' using backward reference.
int offset = 0;
@ -276,7 +295,8 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
if (maxlen > MAX_LENGTH) {
maxlen = MAX_LENGTH;
}
HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
HashChainFindCopy(hash_chain, i, xsize, argb, maxlen,
window_size, iter_pos, iter_limit,
&offset, &len);
}
if (len >= MIN_LENGTH) {
@ -291,8 +311,9 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
if (maxlen > MAX_LENGTH) {
maxlen = MAX_LENGTH;
}
HashChainFindCopy(hash_chain, quality,
i + 1, xsize, argb, maxlen, &offset2, &len2);
HashChainFindCopy(hash_chain, i + 1, xsize, argb, maxlen,
window_size, iter_pos, iter_limit,
&offset2, &len2);
if (len2 > len + 1) {
const uint32_t pixel = argb[i];
// Alternative#2 is a better match. So push pixel at 'i' as literal.
@ -362,7 +383,8 @@ typedef struct {
static int BackwardReferencesTraceBackwards(
int xsize, int ysize, int recursive_cost_model,
const uint32_t* const argb, int cache_bits, VP8LBackwardRefs* const refs);
const uint32_t* const argb, int quality, int cache_bits,
VP8LBackwardRefs* const refs);
static void ConvertPopulationCountTableToBitEstimates(
int num_symbols, const int population_counts[], double output[]) {
@ -387,17 +409,16 @@ static void ConvertPopulationCountTableToBitEstimates(
static int CostModelBuild(CostModel* const m, int xsize, int ysize,
int recursion_level, const uint32_t* const argb,
int cache_bits) {
int quality, int cache_bits) {
int ok = 0;
VP8LHistogram histo;
VP8LBackwardRefs refs;
const int quality = 100;
if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize)) goto Error;
if (recursion_level > 0) {
if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
argb, cache_bits, &refs)) {
argb, quality, cache_bits, &refs)) {
goto Error;
}
} else {
@ -452,11 +473,10 @@ static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
static int BackwardReferencesHashChainDistanceOnly(
int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb,
int cache_bits, uint32_t* const dist_array) {
int quality, int cache_bits, uint32_t* const dist_array) {
int i;
int ok = 0;
int cc_init = 0;
const int quality = 100;
const int pix_count = xsize * ysize;
const int use_color_cache = (cache_bits > 0);
float* const cost =
@ -466,6 +486,10 @@ static int BackwardReferencesHashChainDistanceOnly(
VP8LColorCache hashers;
const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68;
const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82;
const int min_distance_code = 2; // TODO(vikasa): tune as function of quality
int window_size = WINDOW_SIZE;
int iter_pos = 1;
int iter_limit = -1;
if (cost == NULL || cost_model == NULL || hash_chain == NULL) goto Error;
@ -477,7 +501,7 @@ static int BackwardReferencesHashChainDistanceOnly(
}
if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
cache_bits)) {
quality, cache_bits)) {
goto Error;
}
@ -486,6 +510,8 @@ static int BackwardReferencesHashChainDistanceOnly(
// We loop one pixel at a time, but store all currently best points to
// non-processed locations from this point.
dist_array[0] = 0;
GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos,
&iter_limit);
for (i = 0; i < pix_count; ++i) {
double prev_cost = 0.0;
int shortmax;
@ -500,7 +526,8 @@ static int BackwardReferencesHashChainDistanceOnly(
if (maxlen > pix_count - i) {
maxlen = pix_count - i;
}
HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
HashChainFindCopy(hash_chain, i, xsize, argb, maxlen,
window_size, iter_pos, iter_limit,
&offset, &len);
}
if (len >= MIN_LENGTH) {
@ -517,7 +544,7 @@ static int BackwardReferencesHashChainDistanceOnly(
}
// This if is for speedup only. It roughly doubles the speed, and
// makes compression worse by .1 %.
if (len >= 128 && code < 2) {
if (len >= 128 && code <= min_distance_code) {
// Long copy for short distances, let's skip the middle
// lookups for better copies.
// 1) insert the hashes.
@ -528,10 +555,10 @@ static int BackwardReferencesHashChainDistanceOnly(
}
// 2) Add to the hash_chain (but cannot add the last pixel)
{
const int last = (len < pix_count - 1 - i) ? len
: pix_count - 1 - i;
for (k = 0; k < last; ++k) {
HashChainInsert(hash_chain, &argb[i + k], i + k);
const int last = (len + i < pix_count - 1) ? len + i
: pix_count - 1;
for (k = i; k < last; ++k) {
HashChainInsert(hash_chain, &argb[k], k);
}
}
// 3) jump.
@ -571,40 +598,30 @@ Error:
return ok;
}
static int TraceBackwards(const uint32_t* const dist_array,
int dist_array_size,
uint32_t** const chosen_path,
int* const chosen_path_size) {
int i;
// Count how many.
int count = 0;
for (i = dist_array_size - 1; i >= 0; ) {
int k = dist_array[i];
assert(k >= 1);
++count;
i -= k;
}
// Allocate.
*chosen_path_size = count;
*chosen_path =
(uint32_t*)WebPSafeMalloc((uint64_t)count, sizeof(**chosen_path));
if (*chosen_path == NULL) return 0;
// Write in reverse order.
for (i = dist_array_size - 1; i >= 0; ) {
int k = dist_array[i];
assert(k >= 1);
(*chosen_path)[--count] = k;
i -= k;
}
return 1;
// We pack the path at the end of *dist_array and return
// a pointer to this part of the array. Example:
// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
static void TraceBackwards(uint32_t* const dist_array,
int dist_array_size,
uint32_t** const chosen_path,
int* const chosen_path_size) {
uint32_t* path = dist_array + dist_array_size;
uint32_t* cur = dist_array + dist_array_size - 1;
while (cur >= dist_array) {
const int k = *cur;
--path;
*path = k;
cur -= k;
}
*chosen_path = path;
*chosen_path_size = (int)(dist_array + dist_array_size - path);
}
static int BackwardReferencesHashChainFollowChosenPath(
int xsize, int ysize, const uint32_t* const argb, int cache_bits,
int xsize, int ysize, const uint32_t* const argb,
int quality, int cache_bits,
const uint32_t* const chosen_path, int chosen_path_size,
VP8LBackwardRefs* const refs) {
const int quality = 100;
const int pix_count = xsize * ysize;
const int use_color_cache = (cache_bits > 0);
int size = 0;
@ -613,6 +630,9 @@ static int BackwardReferencesHashChainFollowChosenPath(
int ix;
int ok = 0;
int cc_init = 0;
int window_size = WINDOW_SIZE;
int iter_pos = 1;
int iter_limit = -1;
HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
VP8LColorCache hashers;
@ -625,13 +645,16 @@ static int BackwardReferencesHashChainFollowChosenPath(
}
refs->size = 0;
GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos,
&iter_limit);
for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
int offset = 0;
int len = 0;
int maxlen = chosen_path[ix];
if (maxlen != 1) {
HashChainFindCopy(hash_chain, quality,
i, xsize, argb, maxlen, &offset, &len);
HashChainFindCopy(hash_chain, i, xsize, argb, maxlen,
window_size, iter_pos, iter_limit,
&offset, &len);
assert(len == maxlen);
refs->refs[size] = PixOrCopyCreateCopy(offset, len);
if (use_color_cache) {
@ -674,7 +697,7 @@ Error:
static int BackwardReferencesTraceBackwards(int xsize, int ysize,
int recursive_cost_model,
const uint32_t* const argb,
int cache_bits,
int quality, int cache_bits,
VP8LBackwardRefs* const refs) {
int ok = 0;
const int dist_array_size = xsize * ysize;
@ -686,22 +709,18 @@ static int BackwardReferencesTraceBackwards(int xsize, int ysize,
if (dist_array == NULL) goto Error;
if (!BackwardReferencesHashChainDistanceOnly(
xsize, ysize, recursive_cost_model, argb, cache_bits, dist_array)) {
goto Error;
}
if (!TraceBackwards(dist_array, dist_array_size,
&chosen_path, &chosen_path_size)) {
xsize, ysize, recursive_cost_model, argb, quality, cache_bits,
dist_array)) {
goto Error;
}
free(dist_array); // no need to retain this memory any longer
dist_array = NULL;
TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
if (!BackwardReferencesHashChainFollowChosenPath(
xsize, ysize, argb, cache_bits, chosen_path, chosen_path_size, refs)) {
xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size,
refs)) {
goto Error;
}
ok = 1;
Error:
free(chosen_path);
free(dist_array);
return ok;
}
@ -761,8 +780,8 @@ int VP8LGetBackwardReferences(int width, int height,
// Choose appropriate backward reference.
if (lz77_is_useful) {
// TraceBackwards is costly. Run it for higher qualities.
const int try_lz77_trace_backwards = (quality >= 75);
// TraceBackwards is costly. Don't execute it at lower quality (q <= 10).
const int try_lz77_trace_backwards = (quality > 10);
*best = refs_lz77; // default guess: lz77 is better
VP8LClearBackwardRefs(&refs_rle);
if (try_lz77_trace_backwards) {
@ -771,8 +790,8 @@ int VP8LGetBackwardReferences(int width, int height,
if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) {
goto End;
}
if (BackwardReferencesTraceBackwards(
width, height, recursion_level, argb, cache_bits, &refs_trace)) {
if (BackwardReferencesTraceBackwards(width, height, recursion_level, argb,
quality, cache_bits, &refs_trace)) {
VP8LClearBackwardRefs(&refs_lz77);
*best = refs_trace;
}

@ -35,7 +35,8 @@ extern "C" {
#if defined(__GNUC__) && \
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
assert(n != 0);
return 31 ^ __builtin_clz(n);
}
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
#include <intrin.h>
@ -43,15 +44,18 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
unsigned long first_set_bit;
return _BitScanReverse(&first_set_bit, n) ? first_set_bit : -1;
assert(n != 0);
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
}
#else
// Returns (int)floor(log2(n)). n must be > 0.
static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
int log = 0;
uint32_t value = n;
int i;
if (value == 0) return -1;
assert(n != 0);
for (i = 4; i >= 0; --i) {
const int shift = (1 << i);
const uint32_t x = value >> shift;
@ -65,11 +69,11 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
#endif
static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
const int floor = BitsLog2Floor(n);
const int log_floor = BitsLog2Floor(n);
if (n == (n & ~(n - 1))) // zero or a power of two.
return floor;
return log_floor;
else
return floor + 1;
return log_floor + 1;
}
// Splitting of distance and length codes into prefixes and
@ -78,16 +82,17 @@ static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
static WEBP_INLINE void PrefixEncode(int distance, int* const code,
int* const extra_bits_count,
int* const extra_bits_value) {
// Collect the two most significant bits where the highest bit is 1.
const int highest_bit = BitsLog2Floor(--distance);
// & 0x3f is to make behavior well defined when highest_bit
// does not exist or is the least significant bit.
const int second_highest_bit =
(distance >> ((highest_bit - 1) & 0x3f)) & 1;
*extra_bits_count = (highest_bit > 0) ? (highest_bit - 1) : 0;
*extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
*code = (highest_bit > 0) ? (2 * highest_bit + second_highest_bit)
: (highest_bit == 0) ? 1 : 0;
if (distance > 2) { // Collect the two most significant bits.
const int highest_bit = BitsLog2Floor(--distance);
const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
*extra_bits_count = highest_bit - 1;
*extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
*code = 2 * highest_bit + second_highest_bit;
} else {
*extra_bits_count = 0;
*extra_bits_value = 0;
*code = (distance == 2) ? 1 : 0;
}
}
// -----------------------------------------------------------------------------

@ -31,9 +31,9 @@ int WebPConfigInitInternal(WebPConfig* config,
config->target_PSNR = 0.;
config->method = 4;
config->sns_strength = 50;
config->filter_strength = 20; // default: light filtering
config->filter_strength = 60; // rather high filtering, helps w/ gradients.
config->filter_sharpness = 0;
config->filter_type = 0; // default: simple
config->filter_type = 1; // default: strong (so U/V is filtered too)
config->partitions = 0;
config->segments = 4;
config->pass = 1;
@ -46,6 +46,9 @@ int WebPConfigInitInternal(WebPConfig* config,
config->alpha_quality = 100;
config->lossless = 0;
config->image_hint = WEBP_HINT_DEFAULT;
config->emulate_jpeg_size = 0;
config->thread_level = 0;
config->low_memory = 0;
// TODO(skal): tune.
switch (preset) {
@ -122,6 +125,12 @@ int WebPValidateConfig(const WebPConfig* config) {
return 0;
if (config->image_hint >= WEBP_HINT_LAST)
return 0;
if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1)
return 0;
if (config->thread_level < 0 || config->thread_level > 1)
return 0;
if (config->low_memory < 0 || config->low_memory > 1)
return 0;
return 1;
}

@ -75,7 +75,7 @@ const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
// fixed costs for coding levels, deduce from the coding tree.
// This is only the part that doesn't depend on the probability state.
const uint16_t VP8LevelFixedCosts[2048] = {
const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
0, 256, 256, 256, 256, 432, 618, 630,
731, 640, 640, 828, 901, 948, 1021, 1101,
1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
@ -359,7 +359,7 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
for (band = 0; band < NUM_BANDS; ++band) {
for(ctx = 0; ctx < NUM_CTX; ++ctx) {
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
uint16_t* const table = proba->level_cost_[ctype][band][ctx];
const int cost_base = VP8BitCost(1, p[1]);

@ -18,7 +18,8 @@
extern "C" {
#endif
extern const uint16_t VP8LevelFixedCosts[2048]; // approximate cost per level
// approximate cost per level:
extern const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1];
extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p)
// Cost of coding one event with probability 'proba'.

@ -45,10 +45,10 @@ const uint8_t VP8EncBands[16 + 1] = {
0 // sentinel
};
static const uint8_t kCat3[] = { 173, 148, 140 };
static const uint8_t kCat4[] = { 176, 155, 140, 135 };
static const uint8_t kCat5[] = { 180, 157, 141, 134, 130 };
static const uint8_t kCat6[] =
const uint8_t VP8Cat3[] = { 173, 148, 140 };
const uint8_t VP8Cat4[] = { 176, 155, 140, 135 };
const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 };
const uint8_t VP8Cat6[] =
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
//------------------------------------------------------------------------------
@ -113,14 +113,15 @@ static int Record(int bit, proba_t* const stats) {
// Note: no need to record the fixed probas.
static int RecordCoeffs(int ctx, const VP8Residual* const res) {
int n = res->first;
proba_t* s = res->stats[VP8EncBands[n]][ctx];
// should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1
proba_t* s = res->stats[n][ctx];
if (res->last < 0) {
Record(0, s + 0);
return 0;
}
while (n <= res->last) {
int v;
Record(1, s + 0);
Record(1, s + 0); // order of record doesn't matter
while ((v = res->coeffs[n++]) == 0) {
Record(0, s + 1);
s = res->stats[VP8EncBands[n]][0];
@ -174,8 +175,7 @@ static int BranchCost(int nb, int total, int proba) {
return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
}
static int FinalizeTokenProbas(VP8Encoder* const enc) {
VP8Proba* const proba = &enc->proba_;
static int FinalizeTokenProbas(VP8Proba* const proba) {
int has_changed = 0;
int size = 0;
int t, b, c, p;
@ -211,6 +211,47 @@ static int FinalizeTokenProbas(VP8Encoder* const enc) {
return size;
}
//------------------------------------------------------------------------------
// Finalize Segment probability based on the coding tree
static int GetProba(int a, int b) {
const int total = a + b;
return (total == 0) ? 255 // that's the default probability.
: (255 * a + total / 2) / total; // rounded proba
}
static void SetSegmentProbas(VP8Encoder* const enc) {
int p[NUM_MB_SEGMENTS] = { 0 };
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
const VP8MBInfo* const mb = &enc->mb_info_[n];
p[mb->segment_]++;
}
if (enc->pic_->stats != NULL) {
for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
enc->pic_->stats->segment_size[n] = p[n];
}
}
if (enc->segment_hdr_.num_segments_ > 1) {
uint8_t* const probas = enc->proba_.segments_;
probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
probas[1] = GetProba(p[0], p[1]);
probas[2] = GetProba(p[2], p[3]);
enc->segment_hdr_.update_map_ =
(probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
enc->segment_hdr_.size_ =
p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
} else {
enc->segment_hdr_.update_map_ = 0;
enc->segment_hdr_.size_ = 0;
}
}
//------------------------------------------------------------------------------
// helper functions for residuals struct VP8Residual.
@ -239,18 +280,19 @@ static void SetResidualCoeffs(const int16_t* const coeffs,
//------------------------------------------------------------------------------
// Mode costs
static int GetResidualCost(int ctx, const VP8Residual* const res) {
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
int n = res->first;
int p0 = res->prob[VP8EncBands[n]][ctx][0];
const uint16_t* t = res->cost[VP8EncBands[n]][ctx];
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
int p0 = res->prob[n][ctx0][0];
const uint16_t* t = res->cost[n][ctx0];
int cost;
if (res->last < 0) {
return VP8BitCost(0, p0);
}
cost = 0;
while (n <= res->last) {
const int v = res->coeffs[n];
while (n < res->last) {
int v = res->coeffs[n];
const int b = VP8EncBands[n + 1];
++n;
if (v == 0) {
@ -259,19 +301,28 @@ static int GetResidualCost(int ctx, const VP8Residual* const res) {
t = res->cost[b][0];
continue;
}
v = abs(v);
cost += VP8BitCost(1, p0);
if (2u >= (unsigned int)(v + 1)) { // v = -1 or 1
// short-case for "VP8LevelCost(t, 1)" (256 is VP8LevelFixedCosts[1]):
cost += 256 + t[1];
p0 = res->prob[b][1][0];
t = res->cost[b][1];
} else {
cost += VP8LevelCost(t, abs(v));
p0 = res->prob[b][2][0];
t = res->cost[b][2];
cost += VP8LevelCost(t, v);
{
const int ctx = (v == 1) ? 1 : 2;
p0 = res->prob[b][ctx][0];
t = res->cost[b][ctx];
}
}
// Last coefficient is always non-zero
{
const int v = abs(res->coeffs[n]);
assert(v != 0);
cost += VP8BitCost(1, p0);
cost += VP8LevelCost(t, v);
if (n < 15) {
const int b = VP8EncBands[n + 1];
const int ctx = (v == 1) ? 1 : 2;
const int last_p0 = res->prob[b][ctx][0];
cost += VP8BitCost(0, last_p0);
}
}
if (n < 16) cost += VP8BitCost(0, p0);
return cost;
}
@ -342,7 +393,8 @@ int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
int n = res->first;
const uint8_t* p = res->prob[VP8EncBands[n]][ctx];
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
const uint8_t* p = res->prob[n][ctx];
if (!VP8PutBit(bw, res->last >= 0, p[0])) {
return 0;
}
@ -371,30 +423,30 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
} else {
int mask;
const uint8_t* tab;
if (v < 3 + (8 << 1)) { // kCat3 (3b)
if (v < 3 + (8 << 1)) { // VP8Cat3 (3b)
VP8PutBit(bw, 0, p[8]);
VP8PutBit(bw, 0, p[9]);
v -= 3 + (8 << 0);
mask = 1 << 2;
tab = kCat3;
} else if (v < 3 + (8 << 2)) { // kCat4 (4b)
tab = VP8Cat3;
} else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b)
VP8PutBit(bw, 0, p[8]);
VP8PutBit(bw, 1, p[9]);
v -= 3 + (8 << 1);
mask = 1 << 3;
tab = kCat4;
} else if (v < 3 + (8 << 3)) { // kCat5 (5b)
tab = VP8Cat4;
} else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b)
VP8PutBit(bw, 1, p[8]);
VP8PutBit(bw, 0, p[10]);
v -= 3 + (8 << 2);
mask = 1 << 4;
tab = kCat5;
} else { // kCat6 (11b)
tab = VP8Cat5;
} else { // VP8Cat6 (11b)
VP8PutBit(bw, 1, p[8]);
VP8PutBit(bw, 1, p[10]);
v -= 3 + (8 << 3);
mask = 1 << 10;
tab = kCat6;
tab = VP8Cat6;
}
while (mask) {
VP8PutBit(bw, !!(v & mask), *tab++);
@ -411,8 +463,7 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
return 1;
}
static void CodeResiduals(VP8BitWriter* const bw,
VP8EncIterator* const it,
static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
const VP8ModeScore* const rd) {
int x, y, ch;
VP8Residual res;
@ -512,146 +563,23 @@ static void RecordResiduals(VP8EncIterator* const it,
//------------------------------------------------------------------------------
// Token buffer
#ifdef USE_TOKEN_BUFFER
void VP8TBufferInit(VP8TBuffer* const b) {
b->rows_ = NULL;
b->tokens_ = NULL;
b->last_ = &b->rows_;
b->left_ = 0;
b->error_ = 0;
}
int VP8TBufferNewPage(VP8TBuffer* const b) {
VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page));
if (page == NULL) {
b->error_ = 1;
return 0;
}
*b->last_ = page;
b->last_ = &page->next_;
b->left_ = MAX_NUM_TOKEN;
b->tokens_ = page->tokens_;
return 1;
}
void VP8TBufferClear(VP8TBuffer* const b) {
if (b != NULL) {
const VP8Tokens* p = b->rows_;
while (p != NULL) {
const VP8Tokens* const next = p->next_;
free((void*)p);
p = next;
}
VP8TBufferInit(b);
}
}
int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas) {
VP8Tokens* p = b->rows_;
if (b->error_) return 0;
while (p != NULL) {
const int N = (p->next_ == NULL) ? b->left_ : 0;
int n = MAX_NUM_TOKEN;
while (n-- > N) {
VP8PutBit(bw, (p->tokens_[n] >> 15) & 1, probas[p->tokens_[n] & 0x7fff]);
}
p = p->next_;
}
return 1;
}
#if !defined(DISABLE_TOKEN_BUFFER)
#define TOKEN_ID(b, ctx, p) ((p) + NUM_PROBAS * ((ctx) + (b) * NUM_CTX))
static int RecordCoeffTokens(int ctx, const VP8Residual* const res,
VP8TBuffer* tokens) {
int n = res->first;
int b = VP8EncBands[n];
if (!VP8AddToken(tokens, res->last >= 0, TOKEN_ID(b, ctx, 0))) {
return 0;
}
while (n < 16) {
const int c = res->coeffs[n++];
const int sign = c < 0;
int v = sign ? -c : c;
const int base_id = TOKEN_ID(b, ctx, 0);
if (!VP8AddToken(tokens, v != 0, base_id + 1)) {
b = VP8EncBands[n];
ctx = 0;
continue;
}
if (!VP8AddToken(tokens, v > 1, base_id + 2)) {
b = VP8EncBands[n];
ctx = 1;
} else {
if (!VP8AddToken(tokens, v > 4, base_id + 3)) {
if (VP8AddToken(tokens, v != 2, base_id + 4))
VP8AddToken(tokens, v == 4, base_id + 5);
} else if (!VP8AddToken(tokens, v > 10, base_id + 6)) {
if (!VP8AddToken(tokens, v > 6, base_id + 7)) {
// VP8AddToken(tokens, v == 6, 159);
} else {
// VP8AddToken(tokens, v >= 9, 165);
// VP8AddToken(tokens, !(v & 1), 145);
}
} else {
int mask;
const uint8_t* tab;
if (v < 3 + (8 << 1)) { // kCat3 (3b)
VP8AddToken(tokens, 0, base_id + 8);
VP8AddToken(tokens, 0, base_id + 9);
v -= 3 + (8 << 0);
mask = 1 << 2;
tab = kCat3;
} else if (v < 3 + (8 << 2)) { // kCat4 (4b)
VP8AddToken(tokens, 0, base_id + 8);
VP8AddToken(tokens, 1, base_id + 9);
v -= 3 + (8 << 1);
mask = 1 << 3;
tab = kCat4;
} else if (v < 3 + (8 << 3)) { // kCat5 (5b)
VP8AddToken(tokens, 1, base_id + 8);
VP8AddToken(tokens, 0, base_id + 10);
v -= 3 + (8 << 2);
mask = 1 << 4;
tab = kCat5;
} else { // kCat6 (11b)
VP8AddToken(tokens, 1, base_id + 8);
VP8AddToken(tokens, 1, base_id + 10);
v -= 3 + (8 << 3);
mask = 1 << 10;
tab = kCat6;
}
while (mask) {
// VP8AddToken(tokens, !!(v & mask), *tab++);
mask >>= 1;
}
}
ctx = 2;
}
b = VP8EncBands[n];
// VP8PutBitUniform(bw, sign);
if (n == 16 || !VP8AddToken(tokens, n <= res->last, TOKEN_ID(b, ctx, 0))) {
return 1; // EOB
}
}
return 1;
}
static void RecordTokens(VP8EncIterator* const it,
const VP8ModeScore* const rd, VP8TBuffer tokens[2]) {
static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
VP8TBuffer* const tokens) {
int x, y, ch;
VP8Residual res;
VP8Encoder* const enc = it->enc_;
VP8IteratorNzToBytes(it);
if (it->mb_->type_ == 1) { // i16x16
const int ctx = it->top_nz_[8] + it->left_nz_[8];
InitResidual(0, 1, enc, &res);
SetResidualCoeffs(rd->y_dc_levels, &res);
// TODO(skal): FIX -> it->top_nz_[8] = it->left_nz_[8] =
RecordCoeffTokens(it->top_nz_[8] + it->left_nz_[8], &res, &tokens[0]);
it->top_nz_[8] = it->left_nz_[8] =
VP8RecordCoeffTokens(ctx, 1,
res.first, res.last, res.coeffs, tokens);
RecordCoeffs(ctx, &res);
InitResidual(1, 0, enc, &res);
} else {
InitResidual(0, 3, enc, &res);
@ -663,7 +591,9 @@ static void RecordTokens(VP8EncIterator* const it,
const int ctx = it->top_nz_[x] + it->left_nz_[y];
SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
it->top_nz_[x] = it->left_nz_[y] =
RecordCoeffTokens(ctx, &res, &tokens[0]);
VP8RecordCoeffTokens(ctx, res.coeff_type,
res.first, res.last, res.coeffs, tokens);
RecordCoeffs(ctx, &res);
}
}
@ -675,13 +605,16 @@ static void RecordTokens(VP8EncIterator* const it,
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
RecordCoeffTokens(ctx, &res, &tokens[1]);
VP8RecordCoeffTokens(ctx, 2,
res.first, res.last, res.coeffs, tokens);
RecordCoeffs(ctx, &res);
}
}
}
VP8IteratorBytesToNz(it);
}
#endif // USE_TOKEN_BUFFER
#endif // !DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------
// ExtraInfo map / Debug function
@ -697,7 +630,10 @@ static void SetBlock(uint8_t* p, int value, int size) {
#endif
static void ResetSSE(VP8Encoder* const enc) {
memset(enc->sse_, 0, sizeof(enc->sse_));
enc->sse_[0] = 0;
enc->sse_[1] = 0;
enc->sse_[2] = 0;
// Note: enc->sse_[3] is managed by alpha.c
enc->sse_count_ = 0;
}
@ -736,6 +672,7 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
*info = (b > 255) ? 255 : b; break;
}
case 7: *info = mb->alpha_; break;
default: *info = 0; break;
};
}
@ -747,99 +684,13 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
}
//------------------------------------------------------------------------------
// Main loops
//
// VP8EncLoop(): does the final bitstream coding.
static void ResetAfterSkip(VP8EncIterator* const it) {
if (it->mb_->type_ == 1) {
*it->nz_ = 0; // reset all predictors
it->left_nz_[8] = 0;
} else {
*it->nz_ &= (1 << 24); // preserve the dc_nz bit
}
}
int VP8EncLoop(VP8Encoder* const enc) {
int i, s, p;
int ok = 1;
VP8EncIterator it;
VP8ModeScore info;
const int dont_use_skip = !enc->proba_.use_skip_proba_;
const int rd_opt = enc->rd_opt_level_;
const int kAverageBytesPerMB = 5; // TODO: have a kTable[quality/10]
const int bytes_per_parts =
enc->mb_w_ * enc->mb_h_ * kAverageBytesPerMB / enc->num_parts_;
// Initialize the bit-writers
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
}
ResetStats(enc);
ResetSSE(enc);
VP8IteratorInit(enc, &it);
VP8InitFilter(&it);
do {
VP8IteratorImport(&it);
// Warning! order is important: first call VP8Decimate() and
// *then* decide how to code the skip decision if there's one.
if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
CodeResiduals(it.bw_, &it, &info);
} else { // reset predictors after a skip
ResetAfterSkip(&it);
}
#ifdef WEBP_EXPERIMENTAL_FEATURES
if (enc->use_layer_) {
VP8EncCodeLayerBlock(&it);
}
#endif
StoreSideInfo(&it);
VP8StoreFilterStats(&it);
VP8IteratorExport(&it);
ok = VP8IteratorProgress(&it, 20);
} while (ok && VP8IteratorNext(&it, it.yuv_out_));
if (ok) { // Finalize the partitions, check for extra errors.
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterFinish(enc->parts_ + p);
ok &= !enc->parts_[p].error_;
}
}
if (ok) { // All good. Finish up.
if (enc->pic_->stats) { // finalize byte counters...
for (i = 0; i <= 2; ++i) {
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
enc->residual_bytes_[i][s] = (int)((it.bit_count_[s][i] + 7) >> 3);
}
}
}
VP8AdjustFilterStrength(&it); // ...and store filter stats.
} else {
// Something bad happened -> need to do some memory cleanup.
VP8EncFreeBitWriters(enc);
}
return ok;
}
//------------------------------------------------------------------------------
// VP8StatLoop(): only collect statistics (number of skips, token usage, ...)
// This is used for deciding optimal probabilities. It also
// modifies the quantizer value if some target (size, PNSR)
// was specified.
// StatLoop(): only collect statistics (number of skips, token usage, ...).
// This is used for deciding optimal probabilities. It also modifies the
// quantizer value if some target (size, PNSR) was specified.
#define kHeaderSizeEstimate (15 + 20 + 10) // TODO: fix better
static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
float* const PSNR, int percent_delta) {
VP8EncIterator it;
uint64_t size = 0;
uint64_t distortion = 0;
const uint64_t pixel_count = nb_mbs * 384;
static void SetLoopParams(VP8Encoder* const enc, float q) {
// Make sure the quality parameter is inside valid bounds
if (q < 0.) {
q = 0;
@ -848,10 +699,23 @@ static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
}
VP8SetSegmentParams(enc, q); // setup segment quantizations and filters
SetSegmentProbas(enc); // compute segment probabilities
ResetStats(enc);
ResetTokenStats(enc);
ResetSSE(enc);
}
static int OneStatPass(VP8Encoder* const enc, float q, VP8RDLevel rd_opt,
int nb_mbs, float* const PSNR, int percent_delta) {
VP8EncIterator it;
uint64_t size = 0;
uint64_t distortion = 0;
const uint64_t pixel_count = nb_mbs * 384;
SetLoopParams(enc, q);
VP8IteratorInit(enc, &it);
do {
VP8ModeScore info;
@ -867,7 +731,7 @@ static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
return 0;
} while (VP8IteratorNext(&it, it.yuv_out_) && --nb_mbs > 0);
size += FinalizeSkipProba(enc);
size += FinalizeTokenProbas(enc);
size += FinalizeTokenProbas(&enc->proba_);
size += enc->segment_hdr_.size_;
size = ((size + 1024) >> 11) + kHeaderSizeEstimate;
@ -880,10 +744,10 @@ static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
// successive refinement increments.
static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 };
int VP8StatLoop(VP8Encoder* const enc) {
const int do_search =
(enc->config_->target_size > 0 || enc->config_->target_PSNR > 0);
const int fast_probe = (enc->method_ < 2 && !do_search);
static int StatLoop(VP8Encoder* const enc) {
const int method = enc->method_;
const int do_search = enc->do_search_;
const int fast_probe = ((method == 0 || method == 3) && !do_search);
float q = enc->config_->quality;
const int max_passes = enc->config_->pass;
const int task_percent = 20;
@ -894,12 +758,18 @@ int VP8StatLoop(VP8Encoder* const enc) {
// Fast mode: quick analysis pass over few mbs. Better than nothing.
nb_mbs = enc->mb_w_ * enc->mb_h_;
if (fast_probe && nb_mbs > 100) nb_mbs = 100;
if (fast_probe) {
if (method == 3) { // we need more stats for method 3 to be reliable.
nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100;
} else {
nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50;
}
}
// No target size: just do several pass without changing 'q'
if (!do_search) {
for (pass = 0; pass < max_passes; ++pass) {
const int rd_opt = (enc->method_ > 2);
const VP8RDLevel rd_opt = (method >= 3) ? RD_OPT_BASIC : RD_OPT_NONE;
if (!OneStatPass(enc, q, rd_opt, nb_mbs, NULL, percent_per_pass)) {
return 0;
}
@ -907,15 +777,14 @@ int VP8StatLoop(VP8Encoder* const enc) {
} else {
// binary search for a size close to target
for (pass = 0; pass < max_passes && (dqs[pass] > 0); ++pass) {
const int rd_opt = 1;
float PSNR;
int criterion;
const int size = OneStatPass(enc, q, rd_opt, nb_mbs, &PSNR,
const int size = OneStatPass(enc, q, RD_OPT_BASIC, nb_mbs, &PSNR,
percent_per_pass);
#if DEBUG_SEARCH
printf("#%d size=%d PSNR=%.2f q=%.2f\n", pass, size, PSNR, q);
#endif
if (!size) return 0;
if (size == 0) return 0;
if (enc->config_->target_PSNR > 0) {
criterion = (PSNR < enc->config_->target_PSNR);
} else {
@ -929,9 +798,178 @@ int VP8StatLoop(VP8Encoder* const enc) {
}
}
}
VP8CalculateLevelCosts(&enc->proba_); // finalize costs
return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
}
//------------------------------------------------------------------------------
// Main loops
//
static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
static int PreLoopInitialize(VP8Encoder* const enc) {
int p;
int ok = 1;
const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
const int bytes_per_parts =
enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
// Initialize the bit-writers
for (p = 0; ok && p < enc->num_parts_; ++p) {
ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
}
if (!ok) VP8EncFreeBitWriters(enc); // malloc error occurred
return ok;
}
static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
VP8Encoder* const enc = it->enc_;
if (ok) { // Finalize the partitions, check for extra errors.
int p;
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterFinish(enc->parts_ + p);
ok &= !enc->parts_[p].error_;
}
}
if (ok) { // All good. Finish up.
if (enc->pic_->stats) { // finalize byte counters...
int i, s;
for (i = 0; i <= 2; ++i) {
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
}
}
}
VP8AdjustFilterStrength(it); // ...and store filter stats.
} else {
// Something bad happened -> need to do some memory cleanup.
VP8EncFreeBitWriters(enc);
}
return ok;
}
//------------------------------------------------------------------------------
// VP8EncLoop(): does the final bitstream coding.
static void ResetAfterSkip(VP8EncIterator* const it) {
if (it->mb_->type_ == 1) {
*it->nz_ = 0; // reset all predictors
it->left_nz_[8] = 0;
} else {
*it->nz_ &= (1 << 24); // preserve the dc_nz bit
}
}
int VP8EncLoop(VP8Encoder* const enc) {
VP8EncIterator it;
int ok = PreLoopInitialize(enc);
if (!ok) return 0;
StatLoop(enc); // stats-collection loop
VP8IteratorInit(enc, &it);
VP8InitFilter(&it);
do {
VP8ModeScore info;
const int dont_use_skip = !enc->proba_.use_skip_proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
VP8IteratorImport(&it);
// Warning! order is important: first call VP8Decimate() and
// *then* decide how to code the skip decision if there's one.
if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
CodeResiduals(it.bw_, &it, &info);
} else { // reset predictors after a skip
ResetAfterSkip(&it);
}
#ifdef WEBP_EXPERIMENTAL_FEATURES
if (enc->use_layer_) {
VP8EncCodeLayerBlock(&it);
}
#endif
StoreSideInfo(&it);
VP8StoreFilterStats(&it);
VP8IteratorExport(&it);
ok = VP8IteratorProgress(&it, 20);
} while (ok && VP8IteratorNext(&it, it.yuv_out_));
return PostLoopFinalize(&it, ok);
}
//------------------------------------------------------------------------------
// Single pass using Token Buffer.
#if !defined(DISABLE_TOKEN_BUFFER)
#define MIN_COUNT 96 // minimum number of macroblocks before updating stats
int VP8EncTokenLoop(VP8Encoder* const enc) {
int ok;
// Roughly refresh the proba height times per pass
int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
int cnt;
VP8EncIterator it;
VP8Proba* const proba = &enc->proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
if (max_count < MIN_COUNT) max_count = MIN_COUNT;
cnt = max_count;
assert(enc->num_parts_ == 1);
assert(enc->use_tokens_);
assert(proba->use_skip_proba_ == 0);
assert(rd_opt >= RD_OPT_BASIC); // otherwise, token-buffer won't be useful
assert(!enc->do_search_); // TODO(skal): handle pass and dichotomy
SetLoopParams(enc, enc->config_->quality);
ok = PreLoopInitialize(enc);
if (!ok) return 0;
VP8IteratorInit(enc, &it);
VP8InitFilter(&it);
do {
VP8ModeScore info;
VP8IteratorImport(&it);
if (--cnt < 0) {
FinalizeTokenProbas(proba);
VP8CalculateLevelCosts(proba); // refresh cost tables for rd-opt
cnt = max_count;
}
VP8Decimate(&it, &info, rd_opt);
RecordTokens(&it, &info, &enc->tokens_);
#ifdef WEBP_EXPERIMENTAL_FEATURES
if (enc->use_layer_) {
VP8EncCodeLayerBlock(&it);
}
#endif
StoreSideInfo(&it);
VP8StoreFilterStats(&it);
VP8IteratorExport(&it);
ok = VP8IteratorProgress(&it, 20);
} while (ok && VP8IteratorNext(&it, it.yuv_out_));
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
if (ok) {
FinalizeTokenProbas(proba);
ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
(const uint8_t*)proba->coeffs_, 1);
}
return PostLoopFinalize(&it, ok);
}
#else
int VP8EncTokenLoop(VP8Encoder* const enc) {
(void)enc;
return 0; // we shouldn't be here.
}
#endif // DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)

@ -98,8 +98,6 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
}
}
static double BitsEntropy(const int* const array, int n) {
double retval = 0.;
int sum = 0;
@ -149,25 +147,6 @@ static double BitsEntropy(const int* const array, int n) {
}
}
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
double retval = BitsEntropy(&p->literal_[0], VP8LHistogramNumCodes(p))
+ BitsEntropy(&p->red_[0], 256)
+ BitsEntropy(&p->blue_[0], 256)
+ BitsEntropy(&p->alpha_[0], 256)
+ BitsEntropy(&p->distance_[0], NUM_DISTANCE_CODES);
// Compute the extra bits cost.
int i;
for (i = 2; i < NUM_LENGTH_CODES - 2; ++i) {
retval +=
(i >> 1) * p->literal_[256 + i + 2];
}
for (i = 2; i < NUM_DISTANCE_CODES - 2; ++i) {
retval += (i >> 1) * p->distance_[i + 2];
}
return retval;
}
// Returns the cost encode the rle-encoded entropy code.
// The constants in this function are experimental.
static double HuffmanCost(const int* const population, int length) {
@ -207,19 +186,150 @@ static double HuffmanCost(const int* const population, int length) {
return retval;
}
// Estimates the Huffman dictionary + other block overhead size.
static double HistogramEstimateBitsHeader(const VP8LHistogram* const p) {
return HuffmanCost(&p->alpha_[0], 256) +
HuffmanCost(&p->red_[0], 256) +
HuffmanCost(&p->literal_[0], VP8LHistogramNumCodes(p)) +
HuffmanCost(&p->blue_[0], 256) +
HuffmanCost(&p->distance_[0], NUM_DISTANCE_CODES);
static double PopulationCost(const int* const population, int length) {
return BitsEntropy(population, length) + HuffmanCost(population, length);
}
static double ExtraCost(const int* const population, int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
return cost;
}
// Estimates the Entropy + Huffman + other block overhead size cost.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
return HistogramEstimateBitsHeader(p) + VP8LHistogramEstimateBitsBulk(p);
return PopulationCost(p->literal_, VP8LHistogramNumCodes(p))
+ PopulationCost(p->red_, 256)
+ PopulationCost(p->blue_, 256)
+ PopulationCost(p->alpha_, 256)
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES)
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
return BitsEntropy(p->literal_, VP8LHistogramNumCodes(p))
+ BitsEntropy(p->red_, 256)
+ BitsEntropy(p->blue_, 256)
+ BitsEntropy(p->alpha_, 256)
+ BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
// -----------------------------------------------------------------------------
// Various histogram combine/cost-eval functions
// Adds 'in' histogram to 'out'
static void HistogramAdd(const VP8LHistogram* const in,
VP8LHistogram* const out) {
int i;
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
out->literal_[i] += in->literal_[i];
}
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
out->distance_[i] += in->distance_[i];
}
for (i = 0; i < 256; ++i) {
out->red_[i] += in->red_[i];
out->blue_[i] += in->blue_[i];
out->alpha_[i] += in->alpha_[i];
}
}
// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing
// to the threshold value 'cost_threshold'. The score returned is
// Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.
// Since the previous score passed is 'cost_threshold', we only need to compare
// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
// early.
static double HistogramAddEval(const VP8LHistogram* const a,
const VP8LHistogram* const b,
VP8LHistogram* const out,
double cost_threshold) {
double cost = 0;
const double sum_cost = a->bit_cost_ + b->bit_cost_;
int i;
cost_threshold += sum_cost;
// palette_code_bits_ is part of the cost evaluation for literal_.
// TODO(skal): remove/simplify this palette_code_bits_?
out->palette_code_bits_ =
(a->palette_code_bits_ > b->palette_code_bits_) ? a->palette_code_bits_ :
b->palette_code_bits_;
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
out->literal_[i] = a->literal_[i] + b->literal_[i];
}
cost += PopulationCost(out->literal_, VP8LHistogramNumCodes(out));
cost += ExtraCost(out->literal_ + 256, NUM_LENGTH_CODES);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) out->red_[i] = a->red_[i] + b->red_[i];
cost += PopulationCost(out->red_, 256);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) out->blue_[i] = a->blue_[i] + b->blue_[i];
cost += PopulationCost(out->blue_, 256);
if (cost > cost_threshold) return cost;
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
out->distance_[i] = a->distance_[i] + b->distance_[i];
}
cost += PopulationCost(out->distance_, NUM_DISTANCE_CODES);
cost += ExtraCost(out->distance_, NUM_DISTANCE_CODES);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
cost += PopulationCost(out->alpha_, 256);
out->bit_cost_ = cost;
return cost - sum_cost;
}
// Same as HistogramAddEval(), except that the resulting histogram
// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
// the term C(b) which is constant over all the evaluations.
static double HistogramAddThresh(const VP8LHistogram* const a,
const VP8LHistogram* const b,
double cost_threshold) {
int tmp[PIX_OR_COPY_CODES_MAX]; // <= max storage we'll need
int i;
double cost = -a->bit_cost_;
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
tmp[i] = a->literal_[i] + b->literal_[i];
}
// note that the tests are ordered so that the usually largest
// cost shares come first.
cost += PopulationCost(tmp, VP8LHistogramNumCodes(a));
cost += ExtraCost(tmp + 256, NUM_LENGTH_CODES);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) tmp[i] = a->red_[i] + b->red_[i];
cost += PopulationCost(tmp, 256);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) tmp[i] = a->blue_[i] + b->blue_[i];
cost += PopulationCost(tmp, 256);
if (cost > cost_threshold) return cost;
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
tmp[i] = a->distance_[i] + b->distance_[i];
}
cost += PopulationCost(tmp, NUM_DISTANCE_CODES);
cost += ExtraCost(tmp, NUM_DISTANCE_CODES);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) tmp[i] = a->alpha_[i] + b->alpha_[i];
cost += PopulationCost(tmp, 256);
return cost;
}
// -----------------------------------------------------------------------------
static void HistogramBuildImage(int xsize, int histo_bits,
const VP8LBackwardRefs* const backward_refs,
VP8LHistogramSet* const image) {
@ -249,14 +359,15 @@ static uint32_t MyRand(uint32_t *seed) {
}
static int HistogramCombine(const VP8LHistogramSet* const in,
VP8LHistogramSet* const out, int num_pairs) {
VP8LHistogramSet* const out, int iter_mult,
int num_pairs, int num_tries_no_success) {
int ok = 0;
int i, iter;
uint32_t seed = 0;
int tries_with_no_success = 0;
const int min_cluster_size = 2;
int out_size = in->size;
const int outer_iters = in->size * 3;
const int outer_iters = in->size * iter_mult;
const int min_cluster_size = 2;
VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos));
VP8LHistogram* cur_combo = histos + 0; // trial merged histogram
VP8LHistogram* best_combo = histos + 1; // best merged histogram so far
@ -271,29 +382,26 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
// Collapse similar histograms in 'out'.
for (iter = 0; iter < outer_iters && out_size >= min_cluster_size; ++iter) {
// We pick the best pair to be combined out of 'inner_iters' pairs.
double best_cost_diff = 0.;
int best_idx1 = 0, best_idx2 = 1;
int best_idx1 = -1, best_idx2 = 1;
int j;
const int num_tries = (num_pairs < out_size) ? num_pairs : out_size;
seed += iter;
for (j = 0; j < num_pairs; ++j) {
for (j = 0; j < num_tries; ++j) {
double curr_cost_diff;
// Choose two histograms at random and try to combine them.
const uint32_t idx1 = MyRand(&seed) % out_size;
const uint32_t tmp = ((j & 7) + 1) % (out_size - 1);
const uint32_t tmp = (j & 7) + 1;
const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (out_size - 1);
const uint32_t idx2 = (idx1 + diff + 1) % out_size;
if (idx1 == idx2) {
continue;
}
*cur_combo = *out->histograms[idx1];
VP8LHistogramAdd(cur_combo, out->histograms[idx2]);
cur_combo->bit_cost_ = VP8LHistogramEstimateBits(cur_combo);
// Calculate cost reduction on combining.
curr_cost_diff = cur_combo->bit_cost_
- out->histograms[idx1]->bit_cost_
- out->histograms[idx2]->bit_cost_;
if (best_cost_diff > curr_cost_diff) { // found a better pair?
curr_cost_diff = HistogramAddEval(out->histograms[idx1],
out->histograms[idx2],
cur_combo, best_cost_diff);
if (curr_cost_diff < best_cost_diff) { // found a better pair?
{ // swap cur/best combo histograms
VP8LHistogram* const tmp_histo = cur_combo;
cur_combo = best_combo;
@ -305,7 +413,7 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
}
}
if (best_cost_diff < 0.0) {
if (best_idx1 >= 0) {
*out->histograms[best_idx1] = *best_combo;
// swap best_idx2 slot with last one (which is now unused)
--out_size;
@ -315,7 +423,7 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
}
tries_with_no_success = 0;
}
if (++tries_with_no_success >= 50) {
if (++tries_with_no_success >= num_tries_no_success) {
break;
}
}
@ -330,20 +438,11 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
// -----------------------------------------------------------------------------
// Histogram refinement
// What is the bit cost of moving square_histogram from
// cur_symbol to candidate_symbol.
// TODO(skal): we don't really need to copy the histogram and Add(). Instead
// we just need VP8LDualHistogramEstimateBits(A, B) estimation function.
// What is the bit cost of moving square_histogram from cur_symbol to candidate.
static double HistogramDistance(const VP8LHistogram* const square_histogram,
const VP8LHistogram* const candidate) {
const double previous_bit_cost = candidate->bit_cost_;
double new_bit_cost;
VP8LHistogram modified_histo;
modified_histo = *candidate;
VP8LHistogramAdd(&modified_histo, square_histogram);
new_bit_cost = VP8LHistogramEstimateBits(&modified_histo);
return new_bit_cost - previous_bit_cost;
const VP8LHistogram* const candidate,
double cost_threshold) {
return HistogramAddThresh(candidate, square_histogram, cost_threshold);
}
// Find the best 'out' histogram for each of the 'in' histograms.
@ -354,11 +453,12 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
int i;
for (i = 0; i < in->size; ++i) {
int best_out = 0;
double best_bits = HistogramDistance(in->histograms[i], out->histograms[0]);
double best_bits =
HistogramDistance(in->histograms[i], out->histograms[0], 1.e38);
int k;
for (k = 1; k < out->size; ++k) {
const double cur_bits =
HistogramDistance(in->histograms[i], out->histograms[k]);
HistogramDistance(in->histograms[i], out->histograms[k], best_bits);
if (cur_bits < best_bits) {
best_bits = cur_bits;
best_out = k;
@ -372,7 +472,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
HistogramClear(out->histograms[i]);
}
for (i = 0; i < in->size; ++i) {
VP8LHistogramAdd(out->histograms[symbols[i]], in->histograms[i]);
HistogramAdd(in->histograms[i], out->histograms[symbols[i]]);
}
}
@ -384,8 +484,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
int ok = 0;
const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
const int num_histo_pairs = 10 + quality / 2; // For HistogramCombine().
const int histo_image_raw_size = histo_xsize * histo_ysize;
// Heuristic params for HistogramCombine().
const int num_tries_no_success = 8 + (quality >> 1);
const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
const int num_pairs = (quality < 25) ? 10 : (5 * quality) >> 3;
VP8LHistogramSet* const image_out =
VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits);
if (image_out == NULL) return 0;
@ -393,7 +498,8 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
// Build histogram image.
HistogramBuildImage(xsize, histo_bits, refs, image_out);
// Collapse similar histograms.
if (!HistogramCombine(image_out, image_in, num_histo_pairs)) {
if (!HistogramCombine(image_out, image_in, iter_mult, num_pairs,
num_tries_no_success)) {
goto Error;
}
// Find the optimal map from original histograms to the final ones.

@ -80,22 +80,6 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
// represent the entropy code itself.
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
static WEBP_INLINE void VP8LHistogramAdd(VP8LHistogram* const p,
const VP8LHistogram* const a) {
int i;
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
p->literal_[i] += a->literal_[i];
}
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
p->distance_[i] += a->distance_[i];
}
for (i = 0; i < 256; ++i) {
p->red_[i] += a->red_[i];
p->blue_[i] += a->blue_[i];
p->alpha_[i] += a->alpha_[i];
}
}
static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) {
return 256 + NUM_LENGTH_CODES +
((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0);

@ -290,8 +290,11 @@ int WebPPictureView(const WebPPicture* src,
dst->y = src->y + top * src->y_stride + left;
dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1);
dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1);
dst->y_stride = src->y_stride;
dst->uv_stride = src->uv_stride;
if (src->a != NULL) {
dst->a = src->a + top * src->a_stride + left;
dst->a_stride = src->a_stride;
}
#ifdef WEBP_EXPERIMENTAL_FEATURES
if (src->u0 != NULL) {
@ -299,10 +302,12 @@ int WebPPictureView(const WebPPicture* src,
IS_YUV_CSP(dst->colorspace, WEBP_YUV422) ? (left >> 1) : left;
dst->u0 = src->u0 + top * src->uv0_stride + left_pos;
dst->v0 = src->v0 + top * src->uv0_stride + left_pos;
dst->uv0_stride = src->uv0_stride;
}
#endif
} else {
dst->argb = src->argb + top * src->argb_stride + left;
dst->argb_stride = src->argb_stride;
}
return 1;
}
@ -457,7 +462,6 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
(uint8_t*)tmp.argb, width, height,
tmp.argb_stride * 4,
work, 4);
}
WebPPictureFree(pic);
free(work);
@ -801,11 +805,11 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
// Insert alpha values if needed, in replacement for the default 0xff ones.
if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
for (y = 0; y < height; ++y) {
uint32_t* const dst = picture->argb + y * picture->argb_stride;
uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
const uint8_t* const src = picture->a + y * picture->a_stride;
int x;
for (x = 0; x < width; ++x) {
dst[x] = (dst[x] & 0x00ffffffu) | (src[x] << 24);
argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | (src[x] << 24);
}
}
}
@ -906,67 +910,135 @@ void WebPCleanupTransparentArea(WebPPicture* pic) {
#undef SIZE
#undef SIZE2
//------------------------------------------------------------------------------
// local-min distortion
//
// For every pixel in the *reference* picture, we search for the local best
// match in the compressed image. This is not a symmetrical measure.
// search radius. Shouldn't be too large.
#define RADIUS 2
static float AccumulateLSIM(const uint8_t* src, int src_stride,
const uint8_t* ref, int ref_stride,
int w, int h) {
int x, y;
double total_sse = 0.;
for (y = 0; y < h; ++y) {
const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS;
const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1;
for (x = 0; x < w; ++x) {
const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS;
const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1;
double best_sse = 255. * 255.;
const double value = (double)ref[y * ref_stride + x];
int i, j;
for (j = y_0; j < y_1; ++j) {
const uint8_t* s = src + j * src_stride;
for (i = x_0; i < x_1; ++i) {
const double sse = (double)(s[i] - value) * (s[i] - value);
if (sse < best_sse) best_sse = sse;
}
}
total_sse += best_sse;
}
}
return (float)total_sse;
}
#undef RADIUS
//------------------------------------------------------------------------------
// Distortion
// Max value returned in case of exact similarity.
static const double kMinDistortion_dB = 99.;
static float GetPSNR(const double v) {
return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
: kMinDistortion_dB);
}
int WebPPictureDistortion(const WebPPicture* pic1, const WebPPicture* pic2,
int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
int type, float result[5]) {
int c;
DistoStats stats[5];
int has_alpha;
int uv_w, uv_h;
if (pic1 == NULL || pic2 == NULL ||
pic1->width != pic2->width || pic1->height != pic2->height ||
pic1->y == NULL || pic2->y == NULL ||
pic1->u == NULL || pic2->u == NULL ||
pic1->v == NULL || pic2->v == NULL ||
if (src == NULL || ref == NULL ||
src->width != ref->width || src->height != ref->height ||
src->y == NULL || ref->y == NULL ||
src->u == NULL || ref->u == NULL ||
src->v == NULL || ref->v == NULL ||
result == NULL) {
return 0;
}
// TODO(skal): provide distortion for ARGB too.
if (pic1->use_argb == 1 || pic1->use_argb != pic2->use_argb) {
if (src->use_argb == 1 || src->use_argb != ref->use_argb) {
return 0;
}
has_alpha = !!(pic1->colorspace & WEBP_CSP_ALPHA_BIT);
if (has_alpha != !!(pic2->colorspace & WEBP_CSP_ALPHA_BIT) ||
(has_alpha && (pic1->a == NULL || pic2->a == NULL))) {
has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT);
if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) ||
(has_alpha && (src->a == NULL || ref->a == NULL))) {
return 0;
}
memset(stats, 0, sizeof(stats));
VP8SSIMAccumulatePlane(pic1->y, pic1->y_stride,
pic2->y, pic2->y_stride,
pic1->width, pic1->height, &stats[0]);
VP8SSIMAccumulatePlane(pic1->u, pic1->uv_stride,
pic2->u, pic2->uv_stride,
(pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
&stats[1]);
VP8SSIMAccumulatePlane(pic1->v, pic1->uv_stride,
pic2->v, pic2->uv_stride,
(pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
&stats[2]);
if (has_alpha) {
VP8SSIMAccumulatePlane(pic1->a, pic1->a_stride,
pic2->a, pic2->a_stride,
pic1->width, pic1->height, &stats[3]);
}
for (c = 0; c <= 4; ++c) {
if (type == 1) {
const double v = VP8SSIMGet(&stats[c]);
result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
: kMinDistortion_dB);
} else {
const double v = VP8SSIMGetSquaredError(&stats[c]);
result[c] = (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
: kMinDistortion_dB);
uv_w = HALVE(src->width);
uv_h = HALVE(src->height);
if (type >= 2) {
float sse[4];
sse[0] = AccumulateLSIM(src->y, src->y_stride,
ref->y, ref->y_stride, src->width, src->height);
sse[1] = AccumulateLSIM(src->u, src->uv_stride,
ref->u, ref->uv_stride, uv_w, uv_h);
sse[2] = AccumulateLSIM(src->v, src->uv_stride,
ref->v, ref->uv_stride, uv_w, uv_h);
sse[3] = has_alpha ? AccumulateLSIM(src->a, src->a_stride,
ref->a, ref->a_stride,
src->width, src->height)
: 0.f;
result[0] = GetPSNR(sse[0] / (src->width * src->height));
result[1] = GetPSNR(sse[1] / (uv_w * uv_h));
result[2] = GetPSNR(sse[2] / (uv_w * uv_h));
result[3] = GetPSNR(sse[3] / (src->width * src->height));
{
double total_sse = sse[0] + sse[1] + sse[2];
int total_pixels = src->width * src->height + 2 * uv_w * uv_h;
if (has_alpha) {
total_pixels += src->width * src->height;
total_sse += sse[3];
}
result[4] = GetPSNR(total_sse / total_pixels);
}
} else {
int c;
VP8SSIMAccumulatePlane(src->y, src->y_stride,
ref->y, ref->y_stride,
src->width, src->height, &stats[0]);
VP8SSIMAccumulatePlane(src->u, src->uv_stride,
ref->u, ref->uv_stride,
uv_w, uv_h, &stats[1]);
VP8SSIMAccumulatePlane(src->v, src->uv_stride,
ref->v, ref->uv_stride,
uv_w, uv_h, &stats[2]);
if (has_alpha) {
VP8SSIMAccumulatePlane(src->a, src->a_stride,
ref->a, ref->a_stride,
src->width, src->height, &stats[3]);
}
for (c = 0; c <= 4; ++c) {
if (type == 1) {
const double v = VP8SSIMGet(&stats[c]);
result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
: kMinDistortion_dB);
} else {
const double v = VP8SSIMGetSquaredError(&stats[c]);
result[c] = GetPSNR(v);
}
// Accumulate forward
if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
}
// Accumulate forward
if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
}
return 1;
}

@ -27,6 +27,8 @@
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
// power-law modulation. Must be strictly less than 1.
#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
#if defined(__cplusplus) || defined(c_plusplus)
@ -224,28 +226,90 @@ static void SetupFilterStrength(VP8Encoder* const enc) {
// We want to emulate jpeg-like behaviour where the expected "good" quality
// is around q=75. Internally, our "good" middle is around c=50. So we
// map accordingly using linear piece-wise function
static double QualityToCompression(double q) {
const double c = q / 100.;
return (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
static double QualityToCompression(double c) {
const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
// The file size roughly scales as pow(quantizer, 3.). Actually, the
// exponent is somewhere between 2.8 and 3.2, but we're mostly interested
// in the mid-quant range. So we scale the compressibility inversely to
// this power-law: quant ~= compression ^ 1/3. This law holds well for
// low quant. Finer modelling for high-quant would make use of kAcTable[]
// more explicitly.
const double v = pow(linear_c, 1 / 3.);
return v;
}
static double QualityToJPEGCompression(double c, double alpha) {
// We map the complexity 'alpha' and quality setting 'c' to a compression
// exponent empirically matched to the compression curve of libjpeg6b.
// On average, the WebP output size will be roughly similar to that of a
// JPEG file compressed with same quality factor.
const double amin = 0.30;
const double amax = 0.85;
const double exp_min = 0.4;
const double exp_max = 0.9;
const double slope = (exp_min - exp_max) / (amax - amin);
// Linearly interpolate 'expn' from exp_min to exp_max
// in the [amin, amax] range.
const double expn = (alpha > amax) ? exp_min
: (alpha < amin) ? exp_max
: exp_max + slope * (alpha - amin);
const double v = pow(c, expn);
return v;
}
static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
const VP8SegmentInfo* const S2) {
return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
}
static void SimplifySegments(VP8Encoder* const enc) {
int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
const int num_segments = enc->segment_hdr_.num_segments_;
int num_final_segments = 1;
int s1, s2;
for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments
const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
int found = 0;
// check if we already have similar segment
for (s2 = 0; s2 < num_final_segments; ++s2) {
const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
if (SegmentsAreEquivalent(S1, S2)) {
found = 1;
break;
}
}
map[s1] = s2;
if (!found) {
if (num_final_segments != s1) {
enc->dqm_[num_final_segments] = enc->dqm_[s1];
}
++num_final_segments;
}
}
if (num_final_segments < num_segments) { // Remap
int i = enc->mb_w_ * enc->mb_h_;
while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
enc->segment_hdr_.num_segments_ = num_final_segments;
// Replicate the trailing segment infos (it's mostly cosmetics)
for (i = num_final_segments; i < num_segments; ++i) {
enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
}
}
}
void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
int i;
int dq_uv_ac, dq_uv_dc;
const int num_segments = enc->config_->segments;
const int num_segments = enc->segment_hdr_.num_segments_;
const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
const double c_base = QualityToCompression(quality);
const double Q = quality / 100.;
const double c_base = enc->config_->emulate_jpeg_size ?
QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
QualityToCompression(Q);
for (i = 0; i < num_segments; ++i) {
// The file size roughly scales as pow(quantizer, 3.). Actually, the
// exponent is somewhere between 2.8 and 3.2, but we're mostly interested
// in the mid-quant range. So we scale the compressibility inversely to
// this power-law: quant ~= compression ^ 1/3. This law holds well for
// low quant. Finer modelling for high-quant would make use of kAcTable[]
// more explicitely.
// Additionally, we modulate the base exponent 1/3 to accommodate for the
// quantization susceptibility and allow denser segments to be quantized
// more.
const double expn = (1. - amp * enc->dqm_[i].alpha_) / 3.;
// We modulate the base coefficient to accommodate for the quantization
// susceptibility and allow denser segments to be quantized more.
const double expn = 1. - amp * enc->dqm_[i].alpha_;
const double c = pow(c_base, expn);
const int q = (int)(127. * (1. - c));
assert(expn > 0.);
@ -281,9 +345,11 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
enc->dq_uv_dc_ = dq_uv_dc;
enc->dq_uv_ac_ = dq_uv_ac;
SetupMatrices(enc);
SetupFilterStrength(enc); // initialize segments' filtering, eventually
if (num_segments > 1) SimplifySegments(enc);
SetupMatrices(enc); // finalize quantization matrices
}
//------------------------------------------------------------------------------
@ -709,7 +775,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
int mode;
rd->mode_i16 = -1;
for (mode = 0; mode < 4; ++mode) {
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
int nz;
@ -838,7 +904,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
rd->mode_uv = -1;
InitScore(&rd_best);
for (mode = 0; mode < 4; ++mode) {
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
VP8ModeScore rd_uv;
// Reconstruct
@ -867,10 +933,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
const VP8Encoder* const enc = it->enc_;
const int i16 = (it->mb_->type_ == 1);
const int is_i16 = (it->mb_->type_ == 1);
int nz = 0;
if (i16) {
if (is_i16) {
nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
} else {
VP8IteratorStartI4(it);
@ -889,11 +955,66 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
rd->nz = nz;
}
// Refine intra16/intra4 sub-modes based on distortion only (not rate).
static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
const int is_i16 = (it->mb_->type_ == 1);
score_t best_score = MAX_COST;
if (try_both_i4_i16 || is_i16) {
int mode;
int best_mode = -1;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
const uint8_t* const src = it->yuv_in_ + Y_OFF;
const score_t score = VP8SSE16x16(src, ref);
if (score < best_score) {
best_mode = mode;
best_score = score;
}
}
VP8SetIntra16Mode(it, best_mode);
}
if (try_both_i4_i16 || !is_i16) {
uint8_t modes_i4[16];
// We don't evaluate the rate here, but just account for it through a
// constant penalty (i4 mode usually needs more bits compared to i16).
score_t score_i4 = (score_t)I4_PENALTY;
VP8IteratorStartI4(it);
do {
int mode;
int best_sub_mode = -1;
score_t best_sub_score = MAX_COST;
const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
// TODO(skal): we don't really need the prediction pixels here,
// but just the distortion against 'src'.
VP8MakeIntra4Preds(it);
for (mode = 0; mode < NUM_BMODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
const score_t score = VP8SSE4x4(src, ref);
if (score < best_sub_score) {
best_sub_mode = mode;
best_sub_score = score;
}
}
modes_i4[it->i4_] = best_sub_mode;
score_i4 += best_sub_score;
if (score_i4 >= best_score) break;
} while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
if (score_i4 < best_score) {
VP8SetIntra4Mode(it, modes_i4);
}
}
}
//------------------------------------------------------------------------------
// Entry point
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
VP8RDLevel rd_opt) {
int is_skipped;
const int method = it->enc_->method_;
InitScore(rd);
@ -902,22 +1023,21 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
VP8MakeLuma16Preds(it);
VP8MakeChroma8Preds(it);
// for rd_opt = 2, we perform trellis-quant on the final decision only.
// for rd_opt > 2, we use it for every scoring (=much slower).
if (rd_opt > 0) {
it->do_trellis_ = (rd_opt > 2);
if (rd_opt > RD_OPT_NONE) {
it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
PickBestIntra16(it, rd);
if (it->enc_->method_ >= 2) {
if (method >= 2) {
PickBestIntra4(it, rd);
}
PickBestUV(it, rd);
if (rd_opt == 2) {
if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
it->do_trellis_ = 1;
SimpleQuantize(it, rd);
}
} else {
// TODO: for method_ == 2, pick the best intra4/intra16 based on SSE
it->do_trellis_ = (it->enc_->method_ == 2);
// For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
// For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
DistoRefine(it, (method >= 2));
SimpleQuantize(it, rd);
}
is_skipped = (rd->nz == 0);

@ -11,7 +11,9 @@
#include <assert.h>
#include "../webp/format_constants.h"
#include "../utils/utils.h"
#include "../webp/format_constants.h" // RIFF constants
#include "../webp/mux_types.h" // ALPHA_FLAG
#include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -21,25 +23,12 @@ extern "C" {
//------------------------------------------------------------------------------
// Helper functions
// TODO(later): Move to webp/format_constants.h?
static void PutLE24(uint8_t* const data, uint32_t val) {
data[0] = (val >> 0) & 0xff;
data[1] = (val >> 8) & 0xff;
data[2] = (val >> 16) & 0xff;
}
static void PutLE32(uint8_t* const data, uint32_t val) {
PutLE24(data, val);
data[3] = (val >> 24) & 0xff;
}
static int IsVP8XNeeded(const VP8Encoder* const enc) {
return !!enc->has_alpha_; // Currently the only case when VP8X is needed.
// This could change in the future.
}
static int PutPaddingByte(const WebPPicture* const pic) {
const uint8_t pad_byte[1] = { 0 };
return !!pic->writer(pad_byte, 1, pic);
}
@ -73,14 +62,14 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
if (enc->has_alpha_) {
flags |= ALPHA_FLAG_BIT;
flags |= ALPHA_FLAG;
}
PutLE32(vp8x + TAG_SIZE, VP8X_CHUNK_SIZE);
PutLE32(vp8x + CHUNK_HEADER_SIZE, flags);
PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1);
PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1);
if(!pic->writer(vp8x, sizeof(vp8x), pic)) {
if (!pic->writer(vp8x, sizeof(vp8x), pic)) {
return VP8_ENC_ERROR_BAD_WRITE;
}
return VP8_ENC_OK;
@ -327,7 +316,9 @@ static size_t GeneratePartition0(VP8Encoder* const enc) {
PutSegmentHeader(bw, enc);
PutFilterHeader(bw, &enc->filter_hdr_);
VP8PutValue(bw, enc->config_->partitions, 2);
VP8PutValue(bw, enc->num_parts_ == 8 ? 3 :
enc->num_parts_ == 4 ? 2 :
enc->num_parts_ == 2 ? 1 : 0, 2);
PutQuant(bw, enc);
VP8PutBitUniform(bw, 0); // no proba update
VP8WriteProbas(bw, &enc->proba_);

@ -0,0 +1,254 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
// Paginated token buffer
//
// A 'token' is a bit value associated with a probability, either fixed
// or a later-to-be-determined after statistics have been collected.
// For dynamic probability, we just record the slot id (idx) for the probability
// value in the final probability array (uint8_t* probas in VP8EmitTokens).
//
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if !defined(DISABLE_TOKEN_BUFFER)
// we use pages to reduce the number of memcpy()
#define MAX_NUM_TOKEN 8192 // max number of token per page
#define FIXED_PROBA_BIT (1u << 14)
struct VP8Tokens {
uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit
// bit #14: constant proba or idx
// bits 0..13: slot or constant proba
VP8Tokens* next_;
};
//------------------------------------------------------------------------------
void VP8TBufferInit(VP8TBuffer* const b) {
b->tokens_ = NULL;
b->pages_ = NULL;
b->last_page_ = &b->pages_;
b->left_ = 0;
b->error_ = 0;
}
void VP8TBufferClear(VP8TBuffer* const b) {
if (b != NULL) {
const VP8Tokens* p = b->pages_;
while (p != NULL) {
const VP8Tokens* const next = p->next_;
free((void*)p);
p = next;
}
VP8TBufferInit(b);
}
}
static int TBufferNewPage(VP8TBuffer* const b) {
VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page));
if (page == NULL) {
b->error_ = 1;
return 0;
}
*b->last_page_ = page;
b->last_page_ = &page->next_;
b->left_ = MAX_NUM_TOKEN;
b->tokens_ = page->tokens_;
page->next_ = NULL;
return 1;
}
//------------------------------------------------------------------------------
#define TOKEN_ID(t, b, ctx, p) \
((p) + NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t))))
static WEBP_INLINE int AddToken(VP8TBuffer* const b,
int bit, uint32_t proba_idx) {
assert(proba_idx < FIXED_PROBA_BIT);
assert(bit == 0 || bit == 1);
if (b->left_ > 0 || TBufferNewPage(b)) {
const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | proba_idx;
}
return bit;
}
static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b,
int bit, int proba) {
assert(proba < 256);
assert(bit == 0 || bit == 1);
if (b->left_ > 0 || TBufferNewPage(b)) {
const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba;
}
}
int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
const int16_t* const coeffs,
VP8TBuffer* const tokens) {
int n = first;
uint32_t base_id = TOKEN_ID(coeff_type, n, ctx, 0);
if (!AddToken(tokens, last >= 0, base_id + 0)) {
return 0;
}
while (n < 16) {
const int c = coeffs[n++];
const int sign = c < 0;
int v = sign ? -c : c;
if (!AddToken(tokens, v != 0, base_id + 1)) {
ctx = 0;
base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
continue;
}
if (!AddToken(tokens, v > 1, base_id + 2)) {
ctx = 1;
} else {
if (!AddToken(tokens, v > 4, base_id + 3)) {
if (AddToken(tokens, v != 2, base_id + 4))
AddToken(tokens, v == 4, base_id + 5);
} else if (!AddToken(tokens, v > 10, base_id + 6)) {
if (!AddToken(tokens, v > 6, base_id + 7)) {
AddConstantToken(tokens, v == 6, 159);
} else {
AddConstantToken(tokens, v >= 9, 165);
AddConstantToken(tokens, !(v & 1), 145);
}
} else {
int mask;
const uint8_t* tab;
if (v < 3 + (8 << 1)) { // VP8Cat3 (3b)
AddToken(tokens, 0, base_id + 8);
AddToken(tokens, 0, base_id + 9);
v -= 3 + (8 << 0);
mask = 1 << 2;
tab = VP8Cat3;
} else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b)
AddToken(tokens, 0, base_id + 8);
AddToken(tokens, 1, base_id + 9);
v -= 3 + (8 << 1);
mask = 1 << 3;
tab = VP8Cat4;
} else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b)
AddToken(tokens, 1, base_id + 8);
AddToken(tokens, 0, base_id + 10);
v -= 3 + (8 << 2);
mask = 1 << 4;
tab = VP8Cat5;
} else { // VP8Cat6 (11b)
AddToken(tokens, 1, base_id + 8);
AddToken(tokens, 1, base_id + 10);
v -= 3 + (8 << 3);
mask = 1 << 10;
tab = VP8Cat6;
}
while (mask) {
AddConstantToken(tokens, !!(v & mask), *tab++);
mask >>= 1;
}
}
ctx = 2;
}
AddConstantToken(tokens, sign, 128);
base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) {
return 1; // EOB
}
}
return 1;
}
#undef TOKEN_ID
//------------------------------------------------------------------------------
// This function works, but isn't currently used. Saved for later.
#if 0
static void Record(int bit, proba_t* const stats) {
proba_t p = *stats;
if (p >= 0xffff0000u) { // an overflow is inbound.
p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2.
}
// record bit count (lower 16 bits) and increment total count (upper 16 bits).
p += 0x00010000u + bit;
*stats = p;
}
void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) {
const VP8Tokens* p = b->pages_;
while (p != NULL) {
const int N = (p->next_ == NULL) ? b->left_ : 0;
int n = MAX_NUM_TOKEN;
while (n-- > N) {
const uint16_t token = p->tokens_[n];
if (!(token & FIXED_PROBA_BIT)) {
Record((token >> 15) & 1, stats + (token & 0x3fffu));
}
}
p = p->next_;
}
}
#endif // 0
//------------------------------------------------------------------------------
// Final coding pass, with known probabilities
int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas, int final_pass) {
const VP8Tokens* p = b->pages_;
(void)final_pass;
if (b->error_) return 0;
while (p != NULL) {
const VP8Tokens* const next = p->next_;
const int N = (next == NULL) ? b->left_ : 0;
int n = MAX_NUM_TOKEN;
while (n-- > N) {
const uint16_t token = p->tokens_[n];
const int bit = (token >> 15) & 1;
if (token & FIXED_PROBA_BIT) {
VP8PutBit(bw, bit, token & 0xffu); // constant proba
} else {
VP8PutBit(bw, bit, probas[token & 0x3fffu]);
}
}
if (final_pass) free((void*)p);
p = next;
}
if (final_pass) b->pages_ = NULL;
return 1;
}
//------------------------------------------------------------------------------
#else // DISABLE_TOKEN_BUFFER
void VP8TBufferInit(VP8TBuffer* const b) {
(void)b;
}
void VP8TBufferClear(VP8TBuffer* const b) {
(void)b;
}
#endif // !DISABLE_TOKEN_BUFFER
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

@ -16,6 +16,7 @@
#include "../webp/encode.h"
#include "../dsp/dsp.h"
#include "../utils/bit_writer.h"
#include "../utils/thread.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -26,11 +27,8 @@ extern "C" {
// version numbers
#define ENC_MAJ_VERSION 0
#define ENC_MIN_VERSION 2
#define ENC_REV_VERSION 1
// size of histogram used by CollectHistogram.
#define MAX_COEFF_THRESH 64
#define ENC_MIN_VERSION 3
#define ENC_REV_VERSION 0
// intra prediction modes
enum { B_DC_PRED = 0, // 4x4 modes
@ -47,7 +45,8 @@ enum { B_DC_PRED = 0, // 4x4 modes
// Luma16 or UV modes
DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
NUM_PRED_MODES = 4
};
enum { NUM_MB_SEGMENTS = 4,
@ -56,10 +55,18 @@ enum { NUM_MB_SEGMENTS = 4,
NUM_BANDS = 8,
NUM_CTX = 3,
NUM_PROBAS = 11,
MAX_LF_LEVELS = 64, // Maximum loop filter level
MAX_VARIABLE_LEVEL = 67 // last (inclusive) level with variable cost
MAX_LF_LEVELS = 64, // Maximum loop filter level
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67)
};
typedef enum { // Rate-distortion optimization levels
RD_OPT_NONE = 0, // no rd-opt
RD_OPT_BASIC = 1, // basic scoring (no trellis)
RD_OPT_TRELLIS = 2, // perform trellis-quant on the final decision only
RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)
} VP8RDLevel;
// YUV-cache parameters. Cache is 16-pixels wide.
// The original or reconstructed samples can be accessed using VP8Scan[]
// The predicted blocks can be accessed using offsets to yuv_p_ and
@ -160,7 +167,17 @@ typedef int64_t score_t; // type used for scores, rate, distortion
static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
return (n * iQ + B) >> QFIX;
}
extern const uint8_t VP8Zigzag[16];
// size of histogram used by CollectHistogram.
#define MAX_COEFF_THRESH 31
typedef struct VP8Histogram VP8Histogram;
struct VP8Histogram {
// TODO(skal): we only need to store the max_value and last_non_zero actually.
int distribution[MAX_COEFF_THRESH + 1];
};
// Uncomment the following to remove token-buffer code:
// #define DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------
// Headers
@ -314,44 +331,37 @@ void VP8SetSegment(const VP8EncIterator* const it, int segment);
//------------------------------------------------------------------------------
// Paginated token buffer
// WIP: #define USE_TOKEN_BUFFER
#ifdef USE_TOKEN_BUFFER
#define MAX_NUM_TOKEN 2048
typedef struct VP8Tokens VP8Tokens;
struct VP8Tokens {
uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit, bits 0..14: slot
int left_;
VP8Tokens* next_;
};
typedef struct VP8Tokens VP8Tokens; // struct details in token.c
typedef struct {
VP8Tokens* rows_;
uint16_t* tokens_; // set to (*last_)->tokens_
VP8Tokens** last_;
int left_;
int error_; // true in case of malloc error
#if !defined(DISABLE_TOKEN_BUFFER)
VP8Tokens* pages_; // first page
VP8Tokens** last_page_; // last page
uint16_t* tokens_; // set to (*last_page_)->tokens_
int left_; // how many free tokens left before the page is full.
#endif
int error_; // true in case of malloc error
} VP8TBuffer;
void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer
int VP8TBufferNewPage(VP8TBuffer* const b); // allocate a new page
void VP8TBufferClear(VP8TBuffer* const b); // de-allocate memory
int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas);
static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b,
int bit, int proba_idx) {
if (b->left_ > 0 || VP8TBufferNewPage(b)) {
const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | proba_idx;
}
return bit;
}
void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory
#if !defined(DISABLE_TOKEN_BUFFER)
#endif // USE_TOKEN_BUFFER
// Finalizes bitstream when probabilities are known.
// Deletes the allocated token memory if final_pass is true.
int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas, int final_pass);
// record the coding of coefficients without knowing the probabilities yet
int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
const int16_t* const coeffs,
VP8TBuffer* const tokens);
// unused for now
void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
#endif // !DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------
// VP8Encoder
@ -376,6 +386,7 @@ struct VP8Encoder {
// per-partition boolean decoders.
VP8BitWriter bw_; // part0
VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
VP8TBuffer tokens_; // token buffer
int percent_; // for progress
@ -383,6 +394,7 @@ struct VP8Encoder {
int has_alpha_;
uint8_t* alpha_data_; // non-NULL if transparency is present
uint32_t alpha_data_size_;
WebPWorker alpha_worker_;
// enhancement layer
int use_layer_;
@ -394,6 +406,7 @@ struct VP8Encoder {
VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
int base_quant_; // nominal quantizer value. Only used
// for relative coding of segments' quant.
int alpha_; // global susceptibility (<=> complexity)
int uv_alpha_; // U/V quantization susceptibility
// global offset of quantizers, shared by all segments
int dq_y1_dc_;
@ -409,9 +422,12 @@ struct VP8Encoder {
int block_count_[3];
// quality/speed settings
int method_; // 0=fastest, 6=best/slowest.
int rd_opt_level_; // Deduced from method_.
int max_i4_header_bits_; // partition #0 safeness factor
int method_; // 0=fastest, 6=best/slowest.
VP8RDLevel rd_opt_level_; // Deduced from method_.
int max_i4_header_bits_; // partition #0 safeness factor
int thread_level_; // derived from config->thread_level
int do_search_; // derived from config->target_XXX
int use_tokens_; // if true, use token buffer
// Memory
VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
@ -455,6 +471,11 @@ void VP8EncFreeBitWriters(VP8Encoder* const enc);
// in frame.c
extern const uint8_t VP8EncBands[16 + 1];
extern const uint8_t VP8Cat3[];
extern const uint8_t VP8Cat4[];
extern const uint8_t VP8Cat5[];
extern const uint8_t VP8Cat6[];
// Form all the four Intra16x16 predictions in the yuv_p_ cache
void VP8MakeLuma16Preds(const VP8EncIterator* const it);
// Form all the four Chroma8x8 predictions in the yuv_p_ cache
@ -466,9 +487,9 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it);
int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
// Main stat / coding passes
// Main coding calls
int VP8EncLoop(VP8Encoder* const enc);
int VP8StatLoop(VP8Encoder* const enc);
int VP8EncTokenLoop(VP8Encoder* const enc);
// in webpenc.c
// Assign an error code to a picture. Return false for convenience.
@ -485,12 +506,14 @@ int VP8EncAnalyze(VP8Encoder* const enc);
// Sets up segment's quantization values, base_quant_ and filter strengths.
void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
// Pick best modes and fills the levels. Returns true if skipped.
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
VP8RDLevel rd_opt);
// in alpha.c
void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression
int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process
int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data
void VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
// in layer.c
void VP8EncInitLayer(VP8Encoder* const enc); // init everything

@ -37,7 +37,8 @@ extern "C" {
static int CompareColors(const void* p1, const void* p2) {
const uint32_t a = *(const uint32_t*)p1;
const uint32_t b = *(const uint32_t*)p2;
return (a < b) ? -1 : (a > b) ? 1 : 0;
assert(a != b);
return (a < b) ? -1 : 1;
}
// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE,
@ -220,7 +221,7 @@ static int GetHuffBitLengthsAndCodes(
}
// Create Huffman trees.
for (i = 0; i < histogram_image_size; ++i) {
for (i = 0; ok && (i < histogram_image_size); ++i) {
HuffmanTreeCode* const codes = &huffman_codes[5 * i];
VP8LHistogram* const histo = histogram_image->histograms[i];
ok = ok && VP8LCreateHuffmanTree(histo->literal_, 15, codes + 0);
@ -231,7 +232,11 @@ static int GetHuffBitLengthsAndCodes(
}
End:
if (!ok) free(mem_buf);
if (!ok) {
free(mem_buf);
// If one VP8LCreateHuffmanTree() above fails, we need to clean up behind.
memset(huffman_codes, 0, 5 * histogram_image_size * sizeof(*huffman_codes));
}
return ok;
}
@ -406,9 +411,10 @@ static int StoreHuffmanCode(VP8LBitWriter* const bw,
}
static void WriteHuffmanCode(VP8LBitWriter* const bw,
const HuffmanTreeCode* const code, int index) {
const int depth = code->code_lengths[index];
const int symbol = code->codes[index];
const HuffmanTreeCode* const code,
int code_index) {
const int depth = code->code_lengths[code_index];
const int symbol = code->codes[code_index];
VP8LWriteBits(bw, depth, symbol);
}
@ -557,6 +563,9 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
goto Error;
}
// Free combined histograms.
free(histogram_image);
histogram_image = NULL;
// Color Cache parameters.
VP8LWriteBits(bw, 1, use_color_cache);
@ -576,10 +585,10 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
uint32_t i;
if (histogram_argb == NULL) goto Error;
for (i = 0; i < histogram_image_xysize; ++i) {
const int index = histogram_symbols[i] & 0xffff;
histogram_argb[i] = 0xff000000 | (index << 8);
if (index >= max_index) {
max_index = index + 1;
const int symbol_index = histogram_symbols[i] & 0xffff;
histogram_argb[i] = 0xff000000 | (symbol_index << 8);
if (symbol_index >= max_index) {
max_index = symbol_index + 1;
}
}
histogram_image_size = max_index;
@ -603,9 +612,6 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
ClearHuffmanTreeIfOnlyOneSymbol(codes);
}
}
// Free combined histograms.
free(histogram_image);
histogram_image = NULL;
// Store actual literals.
StoreImageToBitMask(bw, width, histogram_bits, &refs,
@ -613,7 +619,7 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
ok = 1;
Error:
if (!ok) free(histogram_image);
free(histogram_image);
VP8LClearBackwardRefs(&refs);
if (huffman_codes != NULL) {
@ -711,13 +717,6 @@ static int ApplyCrossColorFilter(const VP8LEncoder* const enc,
// -----------------------------------------------------------------------------
static void PutLE32(uint8_t* const data, uint32_t val) {
data[0] = (val >> 0) & 0xff;
data[1] = (val >> 8) & 0xff;
data[2] = (val >> 16) & 0xff;
data[3] = (val >> 24) & 0xff;
}
static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic,
size_t riff_size, size_t vp8l_size) {
uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = {
@ -812,30 +811,24 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
return err;
}
// Bundles multiple (2, 4 or 8) pixels into a single pixel.
// Returns the new xsize.
static void BundleColorMap(const WebPPicture* const pic,
int xbits, uint32_t* bundled_argb, int xs) {
int y;
const int bit_depth = 1 << (3 - xbits);
uint32_t code = 0;
const uint32_t* argb = pic->argb;
const int width = pic->width;
const int height = pic->height;
for (y = 0; y < height; ++y) {
int x;
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
static void BundleColorMap(const uint8_t* const row, int width,
int xbits, uint32_t* const dst) {
int x;
if (xbits > 0) {
const int bit_depth = 1 << (3 - xbits);
const int mask = (1 << xbits) - 1;
uint32_t code = 0xff000000;
for (x = 0; x < width; ++x) {
const int mask = (1 << xbits) - 1;
const int xsub = x & mask;
if (xsub == 0) {
code = 0;
code = 0xff000000;
}
// TODO(vikasa): simplify the bundling logic.
code |= (argb[x] & 0xff00) << (bit_depth * xsub);
bundled_argb[y * xs + (x >> xbits)] = 0xff000000 | code;
code |= row[x] << (8 + bit_depth * xsub);
dst[x >> xbits] = code;
}
argb += pic->argb_stride;
} else {
for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
}
}
@ -847,24 +840,43 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
WebPEncodingError err = VP8_ENC_OK;
int i, x, y;
const WebPPicture* const pic = enc->pic_;
uint32_t* argb = pic->argb;
uint32_t* src = pic->argb;
uint32_t* dst;
const int width = pic->width;
const int height = pic->height;
uint32_t* const palette = enc->palette_;
const int palette_size = enc->palette_size_;
uint8_t* row = NULL;
int xbits;
// Replace each input pixel by corresponding palette index.
// This is done line by line.
if (palette_size <= 4) {
xbits = (palette_size <= 2) ? 3 : 2;
} else {
xbits = (palette_size <= 16) ? 1 : 0;
}
err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
if (err != VP8_ENC_OK) goto Error;
dst = enc->argb_;
row = WebPSafeMalloc((uint64_t)width, sizeof(*row));
if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
const uint32_t pix = argb[x];
const uint32_t pix = src[x];
for (i = 0; i < palette_size; ++i) {
if (pix == palette[i]) {
argb[x] = 0xff000000u | (i << 8);
row[x] = i;
break;
}
}
}
argb += pic->argb_stride;
BundleColorMap(row, width, xbits, dst);
src += pic->argb_stride;
dst += enc->current_width_;
}
// Save palette to bitstream.
@ -880,20 +892,8 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
goto Error;
}
if (palette_size <= 16) {
// Image can be packed (multiple pixels per uint32_t).
int xbits = 1;
if (palette_size <= 2) {
xbits = 3;
} else if (palette_size <= 4) {
xbits = 2;
}
err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
if (err != VP8_ENC_OK) goto Error;
BundleColorMap(pic, xbits, enc->argb_, enc->current_width_);
}
Error:
free(row);
return err;
}

@ -93,34 +93,53 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) {
enc->nz_[-1] = 0; // constant
}
// Map configured quality level to coding tools used.
//-------------+---+---+---+---+---+---+
// Quality | 0 | 1 | 2 | 3 | 4 | 5 +
//-------------+---+---+---+---+---+---+
// dynamic prob| ~ | x | x | x | x | x |
//-------------+---+---+---+---+---+---+
// rd-opt modes| | | x | x | x | x |
//-------------+---+---+---+---+---+---+
// fast i4/i16 | x | x | | | | |
//-------------+---+---+---+---+---+---+
// rd-opt i4/16| | | x | x | x | x |
//-------------+---+---+---+---+---+---+
// Trellis | | x | | | x | x |
//-------------+---+---+---+---+---+---+
// full-SNS | | | | | | x |
//-------------+---+---+---+---+---+---+
// Mapping from config->method_ to coding tools used.
//-------------------+---+---+---+---+---+---+---+
// Method | 0 | 1 | 2 | 3 |(4)| 5 | 6 |
//-------------------+---+---+---+---+---+---+---+
// fast probe | x | | | x | | | |
//-------------------+---+---+---+---+---+---+---+
// dynamic proba | ~ | x | x | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// fast mode analysis| | | | | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// basic rd-opt | | | | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// disto-score i4/16 | | | x | | | | |
//-------------------+---+---+---+---+---+---+---+
// rd-opt i4/16 | | | ~ | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// token buffer (opt)| | | | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// Trellis | | | | | | x |Ful|
//-------------------+---+---+---+---+---+---+---+
// full-SNS | | | | | x | x | x |
//-------------------+---+---+---+---+---+---+---+
static void MapConfigToTools(VP8Encoder* const enc) {
const int method = enc->config_->method;
const int limit = 100 - enc->config_->partition_limit;
const WebPConfig* const config = enc->config_;
const int method = config->method;
const int limit = 100 - config->partition_limit;
enc->method_ = method;
enc->rd_opt_level_ = (method >= 6) ? 3
: (method >= 5) ? 2
: (method >= 3) ? 1
: 0;
enc->rd_opt_level_ = (method >= 6) ? RD_OPT_TRELLIS_ALL
: (method >= 5) ? RD_OPT_TRELLIS
: (method >= 3) ? RD_OPT_BASIC
: RD_OPT_NONE;
enc->max_i4_header_bits_ =
256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block
(limit * limit) / (100 * 100); // ... modulated with a quadratic curve.
enc->thread_level_ = config->thread_level;
enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0);
if (!config->low_memory) {
#if !defined(DISABLE_TOKEN_BUFFER)
enc->use_tokens_ = (method >= 3) && !enc->do_search_;
#endif
if (enc->use_tokens_) {
enc->num_parts_ = 1; // doesn't work with multi-partition
}
}
}
// Memory scaling with dimensions:
@ -259,17 +278,21 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
VP8EncInitLayer(enc);
#endif
VP8TBufferInit(&enc->tokens_);
return enc;
}
static void DeleteVP8Encoder(VP8Encoder* enc) {
static int DeleteVP8Encoder(VP8Encoder* enc) {
int ok = 1;
if (enc != NULL) {
VP8EncDeleteAlpha(enc);
ok = VP8EncDeleteAlpha(enc);
#ifdef WEBP_EXPERIMENTAL_FEATURES
VP8EncDeleteLayer(enc);
#endif
VP8TBufferClear(&enc->tokens_);
free(enc);
}
return ok;
}
//------------------------------------------------------------------------------
@ -332,7 +355,7 @@ int WebPReportProgress(const WebPPicture* const pic,
//------------------------------------------------------------------------------
int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
int ok;
int ok = 0;
if (pic == NULL)
return 0;
@ -351,32 +374,38 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
if (!config->lossless) {
VP8Encoder* enc = NULL;
if (pic->y == NULL || pic->u == NULL || pic->v == NULL) {
if (pic->argb != NULL) {
if (!WebPPictureARGBToYUVA(pic, WEBP_YUV420)) return 0;
} else {
return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
}
// Make sure we have YUVA samples.
if (!WebPPictureARGBToYUVA(pic, WEBP_YUV420)) return 0;
}
enc = InitVP8Encoder(config, pic);
if (enc == NULL) return 0; // pic->error is already set.
// Note: each of the tasks below account for 20% in the progress report.
ok = VP8EncAnalyze(enc)
&& VP8StatLoop(enc)
&& VP8EncLoop(enc)
&& VP8EncFinishAlpha(enc)
ok = VP8EncAnalyze(enc);
// Analysis is done, proceed to actual coding.
ok = ok && VP8EncStartAlpha(enc); // possibly done in parallel
if (!enc->use_tokens_) {
ok = VP8EncLoop(enc);
} else {
ok = VP8EncTokenLoop(enc);
}
ok = ok && VP8EncFinishAlpha(enc);
#ifdef WEBP_EXPERIMENTAL_FEATURES
&& VP8EncFinishLayer(enc)
ok = ok && VP8EncFinishLayer(enc);
#endif
&& VP8EncWrite(enc);
ok = ok && VP8EncWrite(enc);
StoreStats(enc);
if (!ok) {
VP8EncFreeBitWriters(enc);
}
DeleteVP8Encoder(enc);
ok &= DeleteVP8Encoder(enc); // must always be called, even if !ok
} else {
if (pic->argb == NULL)
return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
// Make sure we have ARGB samples.
if (pic->argb == NULL && !WebPPictureYUVAToARGB(pic)) {
return 0;
}
ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem.
}

@ -12,6 +12,7 @@
#include <assert.h>
#include "./muxi.h"
#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -47,8 +48,9 @@ static void MuxRelease(WebPMux* const mux) {
MuxImageDeleteAll(&mux->images_);
DeleteAllChunks(&mux->vp8x_);
DeleteAllChunks(&mux->iccp_);
DeleteAllChunks(&mux->loop_);
DeleteAllChunks(&mux->meta_);
DeleteAllChunks(&mux->anim_);
DeleteAllChunks(&mux->exif_);
DeleteAllChunks(&mux->xmp_);
DeleteAllChunks(&mux->unknown_);
}
@ -81,13 +83,14 @@ static WebPMuxError MuxSet(WebPMux* const mux, CHUNK_INDEX idx, uint32_t nth,
ChunkInit(&chunk);
SWITCH_ID_LIST(IDX_VP8X, &mux->vp8x_);
SWITCH_ID_LIST(IDX_ICCP, &mux->iccp_);
SWITCH_ID_LIST(IDX_LOOP, &mux->loop_);
SWITCH_ID_LIST(IDX_META, &mux->meta_);
if (idx == IDX_UNKNOWN && data->size_ > TAG_SIZE) {
SWITCH_ID_LIST(IDX_ANIM, &mux->anim_);
SWITCH_ID_LIST(IDX_EXIF, &mux->exif_);
SWITCH_ID_LIST(IDX_XMP, &mux->xmp_);
if (idx == IDX_UNKNOWN && data->size > TAG_SIZE) {
// For raw-data unknown chunk, the first four bytes should be the tag to be
// used for the chunk.
const WebPData tmp = { data->bytes_ + TAG_SIZE, data->size_ - TAG_SIZE };
err = ChunkAssignData(&chunk, &tmp, copy_data, GetLE32(data->bytes_ + 0));
const WebPData tmp = { data->bytes + TAG_SIZE, data->size - TAG_SIZE };
err = ChunkAssignData(&chunk, &tmp, copy_data, GetLE32(data->bytes + 0));
if (err == WEBP_MUX_OK)
err = ChunkSetNth(&chunk, &mux->unknown_, nth);
}
@ -106,39 +109,40 @@ static WebPMuxError MuxAddChunk(WebPMux* const mux, uint32_t nth, uint32_t tag,
return MuxSet(mux, idx, nth, &chunk_data, copy_data);
}
// Create data for frame/tile given image data, offsets and duration.
static WebPMuxError CreateFrameTileData(const WebPData* const image,
int x_offset, int y_offset,
int duration, int is_lossless,
int is_frame,
WebPData* const frame_tile) {
// Create data for frame/fragment given image data, offsets and duration.
static WebPMuxError CreateFrameFragmentData(
const WebPData* const image, int x_offset, int y_offset, int duration,
WebPMuxAnimDispose dispose_method, int is_lossless, int is_frame,
WebPData* const frame_frgm) {
int width;
int height;
uint8_t* frame_tile_bytes;
const size_t frame_tile_size = kChunks[is_frame ? IDX_FRAME : IDX_TILE].size;
uint8_t* frame_frgm_bytes;
const size_t frame_frgm_size = kChunks[is_frame ? IDX_ANMF : IDX_FRGM].size;
const int ok = is_lossless ?
VP8LGetInfo(image->bytes_, image->size_, &width, &height, NULL) :
VP8GetInfo(image->bytes_, image->size_, image->size_, &width, &height);
VP8LGetInfo(image->bytes, image->size, &width, &height, NULL) :
VP8GetInfo(image->bytes, image->size, image->size, &width, &height);
if (!ok) return WEBP_MUX_INVALID_ARGUMENT;
assert(width > 0 && height > 0 && duration > 0);
assert(width > 0 && height > 0 && duration >= 0);
assert(dispose_method == (dispose_method & 1));
// Note: assertion on upper bounds is done in PutLE24().
frame_tile_bytes = (uint8_t*)malloc(frame_tile_size);
if (frame_tile_bytes == NULL) return WEBP_MUX_MEMORY_ERROR;
frame_frgm_bytes = (uint8_t*)malloc(frame_frgm_size);
if (frame_frgm_bytes == NULL) return WEBP_MUX_MEMORY_ERROR;
PutLE24(frame_tile_bytes + 0, x_offset / 2);
PutLE24(frame_tile_bytes + 3, y_offset / 2);
PutLE24(frame_frgm_bytes + 0, x_offset / 2);
PutLE24(frame_frgm_bytes + 3, y_offset / 2);
if (is_frame) {
PutLE24(frame_tile_bytes + 6, width - 1);
PutLE24(frame_tile_bytes + 9, height - 1);
PutLE24(frame_tile_bytes + 12, duration - 1);
PutLE24(frame_frgm_bytes + 6, width - 1);
PutLE24(frame_frgm_bytes + 9, height - 1);
PutLE24(frame_frgm_bytes + 12, duration);
frame_frgm_bytes[15] = (dispose_method & 1);
}
frame_tile->bytes_ = frame_tile_bytes;
frame_tile->size_ = frame_tile_size;
frame_frgm->bytes = frame_frgm_bytes;
frame_frgm->size = frame_frgm_size;
return WEBP_MUX_OK;
}
@ -149,8 +153,8 @@ static WebPMuxError GetImageData(const WebPData* const bitstream,
WebPData* const image, WebPData* const alpha,
int* const is_lossless) {
WebPDataInit(alpha); // Default: no alpha.
if (bitstream->size_ < TAG_SIZE ||
memcmp(bitstream->bytes_, "RIFF", TAG_SIZE)) {
if (bitstream->size < TAG_SIZE ||
memcmp(bitstream->bytes, "RIFF", TAG_SIZE)) {
// It is NOT webp file data. Return input data as is.
*image = *bitstream;
} else {
@ -166,7 +170,7 @@ static WebPMuxError GetImageData(const WebPData* const bitstream,
}
WebPMuxDelete(mux);
}
*is_lossless = VP8LCheckSignature(image->bytes_, image->size_);
*is_lossless = VP8LCheckSignature(image->bytes, image->size);
return WEBP_MUX_OK;
}
@ -185,204 +189,173 @@ static WebPMuxError DeleteChunks(WebPChunk** chunk_list, uint32_t tag) {
return err;
}
static WebPMuxError MuxDeleteAllNamedData(WebPMux* const mux, CHUNK_INDEX idx) {
const WebPChunkId id = kChunks[idx].id;
static WebPMuxError MuxDeleteAllNamedData(WebPMux* const mux, uint32_t tag) {
const WebPChunkId id = ChunkGetIdFromTag(tag);
WebPChunk** chunk_list;
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
assert(mux != NULL);
if (IsWPI(id)) return WEBP_MUX_INVALID_ARGUMENT;
chunk_list = MuxGetChunkListFromId(mux, id);
if (chunk_list == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return DeleteChunks(chunk_list, kChunks[idx].tag);
}
static WebPMuxError DeleteLoopCount(WebPMux* const mux) {
return MuxDeleteAllNamedData(mux, IDX_LOOP);
return DeleteChunks(chunk_list, tag);
}
//------------------------------------------------------------------------------
// Set API(s).
WebPMuxError WebPMuxSetImage(WebPMux* mux,
const WebPData* bitstream, int copy_data) {
WebPMuxError WebPMuxSetChunk(WebPMux* mux, const char fourcc[4],
const WebPData* chunk_data, int copy_data) {
CHUNK_INDEX idx;
uint32_t tag;
WebPMuxError err;
WebPChunk chunk;
WebPMuxImage wpi;
WebPData image;
WebPData alpha;
int is_lossless;
int image_tag;
if (mux == NULL || bitstream == NULL || bitstream->bytes_ == NULL ||
bitstream->size_ > MAX_CHUNK_PAYLOAD) {
if (mux == NULL || fourcc == NULL || chunk_data == NULL ||
chunk_data->bytes == NULL || chunk_data->size > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
}
idx = ChunkGetIndexFromFourCC(fourcc);
tag = ChunkGetTagFromFourCC(fourcc);
// If given data is for a whole webp file,
// extract only the VP8/VP8L data from it.
err = GetImageData(bitstream, &image, &alpha, &is_lossless);
if (err != WEBP_MUX_OK) return err;
image_tag = is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
// Delete the existing images.
MuxImageDeleteAll(&mux->images_);
MuxImageInit(&wpi);
// Delete existing chunk(s) with the same 'fourcc'.
err = MuxDeleteAllNamedData(mux, tag);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
if (alpha.bytes_ != NULL) { // Add alpha chunk.
ChunkInit(&chunk);
err = ChunkAssignData(&chunk, &alpha, copy_data, kChunks[IDX_ALPHA].tag);
if (err != WEBP_MUX_OK) goto Err;
err = ChunkSetNth(&chunk, &wpi.alpha_, 1);
if (err != WEBP_MUX_OK) goto Err;
}
// Add the given chunk.
return MuxSet(mux, idx, 1, chunk_data, copy_data);
}
// Add image chunk.
// Creates a chunk from given 'data' and sets it as 1st chunk in 'chunk_list'.
static WebPMuxError AddDataToChunkList(
const WebPData* const data, int copy_data, uint32_t tag,
WebPChunk** chunk_list) {
WebPChunk chunk;
WebPMuxError err;
ChunkInit(&chunk);
err = ChunkAssignData(&chunk, &image, copy_data, image_tag);
if (err != WEBP_MUX_OK) goto Err;
err = ChunkSetNth(&chunk, &wpi.img_, 1);
err = ChunkAssignData(&chunk, data, copy_data, tag);
if (err != WEBP_MUX_OK) goto Err;
// Add this image to mux.
err = MuxImagePush(&wpi, &mux->images_);
err = ChunkSetNth(&chunk, chunk_list, 1);
if (err != WEBP_MUX_OK) goto Err;
// All OK.
return WEBP_MUX_OK;
Err:
// Something bad happened.
ChunkRelease(&chunk);
MuxImageRelease(&wpi);
return err;
}
WebPMuxError WebPMuxSetMetadata(WebPMux* mux, const WebPData* metadata,
int copy_data) {
WebPMuxError err;
if (mux == NULL || metadata == NULL || metadata->bytes_ == NULL ||
metadata->size_ > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
// Extracts image & alpha data from the given bitstream and then sets wpi.alpha_
// and wpi.img_ appropriately.
static WebPMuxError SetAlphaAndImageChunks(
const WebPData* const bitstream, int copy_data, WebPMuxImage* const wpi) {
int is_lossless = 0;
WebPData image, alpha;
WebPMuxError err = GetImageData(bitstream, &image, &alpha, &is_lossless);
const int image_tag =
is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
if (err != WEBP_MUX_OK) return err;
if (alpha.bytes != NULL) {
err = AddDataToChunkList(&alpha, copy_data, kChunks[IDX_ALPHA].tag,
&wpi->alpha_);
if (err != WEBP_MUX_OK) return err;
}
// Delete the existing metadata chunk(s).
err = WebPMuxDeleteMetadata(mux);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Add the given metadata chunk.
return MuxSet(mux, IDX_META, 1, metadata, copy_data);
return AddDataToChunkList(&image, copy_data, image_tag, &wpi->img_);
}
WebPMuxError WebPMuxSetColorProfile(WebPMux* mux, const WebPData* color_profile,
int copy_data) {
WebPMuxError WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream,
int copy_data) {
WebPMuxImage wpi;
WebPMuxError err;
if (mux == NULL || color_profile == NULL || color_profile->bytes_ == NULL ||
color_profile->size_ > MAX_CHUNK_PAYLOAD) {
// Sanity checks.
if (mux == NULL || bitstream == NULL || bitstream->bytes == NULL ||
bitstream->size > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// Delete the existing ICCP chunk(s).
err = WebPMuxDeleteColorProfile(mux);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Add the given ICCP chunk.
return MuxSet(mux, IDX_ICCP, 1, color_profile, copy_data);
}
WebPMuxError WebPMuxSetLoopCount(WebPMux* mux, int loop_count) {
WebPMuxError err;
uint8_t* data = NULL;
if (mux->images_ != NULL) {
// Only one 'simple image' can be added in mux. So, remove present images.
MuxImageDeleteAll(&mux->images_);
}
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
if (loop_count >= MAX_LOOP_COUNT) return WEBP_MUX_INVALID_ARGUMENT;
MuxImageInit(&wpi);
err = SetAlphaAndImageChunks(bitstream, copy_data, &wpi);
if (err != WEBP_MUX_OK) goto Err;
// Delete the existing LOOP chunk(s).
err = DeleteLoopCount(mux);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Add this WebPMuxImage to mux.
err = MuxImagePush(&wpi, &mux->images_);
if (err != WEBP_MUX_OK) goto Err;
// Add the given loop count.
data = (uint8_t*)malloc(kChunks[IDX_LOOP].size);
if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
// All is well.
return WEBP_MUX_OK;
PutLE16(data, loop_count);
err = MuxAddChunk(mux, 1, kChunks[IDX_LOOP].tag, data,
kChunks[IDX_LOOP].size, 1);
free(data);
Err: // Something bad happened.
MuxImageRelease(&wpi);
return err;
}
static WebPMuxError MuxPushFrameTileInternal(
WebPMux* const mux, const WebPData* const bitstream, int x_offset,
int y_offset, int duration, int copy_data, uint32_t tag) {
WebPChunk chunk;
WebPData image;
WebPData alpha;
WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* frame,
int copy_data) {
WebPMuxImage wpi;
WebPMuxError err;
WebPData frame_tile;
const int is_frame = (tag == kChunks[IDX_FRAME].tag) ? 1 : 0;
int is_lossless;
int image_tag;
int is_frame;
const WebPData* const bitstream = &frame->bitstream;
// Sanity checks.
if (mux == NULL || bitstream == NULL || bitstream->bytes_ == NULL ||
bitstream->size_ > MAX_CHUNK_PAYLOAD) {
if (mux == NULL || frame == NULL) return WEBP_MUX_INVALID_ARGUMENT;
is_frame = (frame->id == WEBP_CHUNK_ANMF);
if (!(is_frame || (frame->id == WEBP_CHUNK_FRGM))) {
return WEBP_MUX_INVALID_ARGUMENT;
}
if (x_offset < 0 || x_offset >= MAX_POSITION_OFFSET ||
y_offset < 0 || y_offset >= MAX_POSITION_OFFSET ||
duration <= 0 || duration > MAX_DURATION) {
#ifndef WEBP_EXPERIMENTAL_FEATURES
if (frame->id == WEBP_CHUNK_FRGM) { // disabled for now.
return WEBP_MUX_INVALID_ARGUMENT;
}
#endif
// Snap offsets to even positions.
x_offset &= ~1;
y_offset &= ~1;
if (bitstream->bytes == NULL || bitstream->size > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// If given data is for a whole webp file,
// extract only the VP8/VP8L data from it.
err = GetImageData(bitstream, &image, &alpha, &is_lossless);
if (err != WEBP_MUX_OK) return err;
image_tag = is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
if (mux->images_ != NULL) {
const WebPMuxImage* const image = mux->images_;
const uint32_t image_id = (image->header_ != NULL) ?
ChunkGetIdFromTag(image->header_->tag_) : WEBP_CHUNK_IMAGE;
if (image_id != frame->id) {
return WEBP_MUX_INVALID_ARGUMENT; // Conflicting frame types.
}
}
WebPDataInit(&frame_tile);
ChunkInit(&chunk);
MuxImageInit(&wpi);
if (alpha.bytes_ != NULL) {
// Add alpha chunk.
err = ChunkAssignData(&chunk, &alpha, copy_data, kChunks[IDX_ALPHA].tag);
err = SetAlphaAndImageChunks(bitstream, copy_data, &wpi);
if (err != WEBP_MUX_OK) goto Err;
assert(wpi.img_ != NULL); // As SetAlphaAndImageChunks() was successful.
{
const int is_lossless = (wpi.img_->tag_ == kChunks[IDX_VP8L].tag);
const int x_offset = frame->x_offset & ~1; // Snap offsets to even.
const int y_offset = frame->y_offset & ~1;
const int duration = is_frame ? frame->duration : 1 /* unused */;
const WebPMuxAnimDispose dispose_method =
is_frame ? frame->dispose_method : 0 /* unused */;
const uint32_t tag = kChunks[is_frame ? IDX_ANMF : IDX_FRGM].tag;
WebPData frame_frgm;
if (x_offset < 0 || x_offset >= MAX_POSITION_OFFSET ||
y_offset < 0 || y_offset >= MAX_POSITION_OFFSET ||
(duration < 0 || duration >= MAX_DURATION) ||
dispose_method != (dispose_method & 1)) {
err = WEBP_MUX_INVALID_ARGUMENT;
goto Err;
}
err = CreateFrameFragmentData(&wpi.img_->data_, x_offset, y_offset,
duration, dispose_method, is_lossless,
is_frame, &frame_frgm);
if (err != WEBP_MUX_OK) goto Err;
err = ChunkSetNth(&chunk, &wpi.alpha_, 1);
// Add frame/fragment chunk (with copy_data = 1).
err = AddDataToChunkList(&frame_frgm, 1, tag, &wpi.header_);
WebPDataClear(&frame_frgm); // frame_frgm owned by wpi.header_ now.
if (err != WEBP_MUX_OK) goto Err;
ChunkInit(&chunk); // chunk owned by wpi.alpha_ now.
}
// Add image chunk.
err = ChunkAssignData(&chunk, &image, copy_data, image_tag);
if (err != WEBP_MUX_OK) goto Err;
err = ChunkSetNth(&chunk, &wpi.img_, 1);
if (err != WEBP_MUX_OK) goto Err;
ChunkInit(&chunk); // chunk owned by wpi.img_ now.
// Create frame/tile data.
err = CreateFrameTileData(&image, x_offset, y_offset, duration, is_lossless,
is_frame, &frame_tile);
if (err != WEBP_MUX_OK) goto Err;
// Add frame/tile chunk (with copy_data = 1).
err = ChunkAssignData(&chunk, &frame_tile, 1, tag);
if (err != WEBP_MUX_OK) goto Err;
WebPDataClear(&frame_tile);
err = ChunkSetNth(&chunk, &wpi.header_, 1);
if (err != WEBP_MUX_OK) goto Err;
ChunkInit(&chunk); // chunk owned by wpi.header_ now.
// Add this WebPMuxImage to mux.
err = MuxImagePush(&wpi, &mux->images_);
if (err != WEBP_MUX_OK) goto Err;
@ -391,86 +364,61 @@ static WebPMuxError MuxPushFrameTileInternal(
return WEBP_MUX_OK;
Err: // Something bad happened.
WebPDataClear(&frame_tile);
ChunkRelease(&chunk);
MuxImageRelease(&wpi);
return err;
}
WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPData* bitstream,
int x_offset, int y_offset,
int duration, int copy_data) {
return MuxPushFrameTileInternal(mux, bitstream, x_offset, y_offset,
duration, copy_data, kChunks[IDX_FRAME].tag);
}
WebPMuxError WebPMuxPushTile(WebPMux* mux, const WebPData* bitstream,
int x_offset, int y_offset,
int copy_data) {
return MuxPushFrameTileInternal(mux, bitstream, x_offset, y_offset,
1 /* unused duration */, copy_data,
kChunks[IDX_TILE].tag);
}
//------------------------------------------------------------------------------
// Delete API(s).
WebPMuxError WebPMuxDeleteImage(WebPMux* mux) {
WebPMuxError WebPMuxSetAnimationParams(WebPMux* mux,
const WebPMuxAnimParams* params) {
WebPMuxError err;
uint8_t data[ANIM_CHUNK_SIZE];
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
err = MuxValidateForImage(mux);
if (err != WEBP_MUX_OK) return err;
// All well, delete image.
MuxImageDeleteAll(&mux->images_);
return WEBP_MUX_OK;
}
if (mux == NULL || params == NULL) return WEBP_MUX_INVALID_ARGUMENT;
if (params->loop_count < 0 || params->loop_count >= MAX_LOOP_COUNT) {
return WEBP_MUX_INVALID_ARGUMENT;
}
WebPMuxError WebPMuxDeleteMetadata(WebPMux* mux) {
return MuxDeleteAllNamedData(mux, IDX_META);
}
// Delete any existing ANIM chunk(s).
err = MuxDeleteAllNamedData(mux, kChunks[IDX_ANIM].tag);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
WebPMuxError WebPMuxDeleteColorProfile(WebPMux* mux) {
return MuxDeleteAllNamedData(mux, IDX_ICCP);
// Set the animation parameters.
PutLE32(data, params->bgcolor);
PutLE16(data + 4, params->loop_count);
return MuxAddChunk(mux, 1, kChunks[IDX_ANIM].tag, data, sizeof(data), 1);
}
static WebPMuxError DeleteFrameTileInternal(WebPMux* const mux, uint32_t nth,
CHUNK_INDEX idx) {
const WebPChunkId id = kChunks[idx].id;
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
//------------------------------------------------------------------------------
// Delete API(s).
assert(idx == IDX_FRAME || idx == IDX_TILE);
return MuxImageDeleteNth(&mux->images_, nth, id);
WebPMuxError WebPMuxDeleteChunk(WebPMux* mux, const char fourcc[4]) {
if (mux == NULL || fourcc == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return MuxDeleteAllNamedData(mux, ChunkGetTagFromFourCC(fourcc));
}
WebPMuxError WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth) {
return DeleteFrameTileInternal(mux, nth, IDX_FRAME);
}
WebPMuxError WebPMuxDeleteTile(WebPMux* mux, uint32_t nth) {
return DeleteFrameTileInternal(mux, nth, IDX_TILE);
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return MuxImageDeleteNth(&mux->images_, nth);
}
//------------------------------------------------------------------------------
// Assembly of the WebP RIFF file.
static WebPMuxError GetFrameTileInfo(const WebPChunk* const frame_tile_chunk,
int* const x_offset, int* const y_offset,
int* const duration) {
const uint32_t tag = frame_tile_chunk->tag_;
const int is_frame = (tag == kChunks[IDX_FRAME].tag);
const WebPData* const data = &frame_tile_chunk->data_;
static WebPMuxError GetFrameFragmentInfo(
const WebPChunk* const frame_frgm_chunk,
int* const x_offset, int* const y_offset, int* const duration) {
const uint32_t tag = frame_frgm_chunk->tag_;
const int is_frame = (tag == kChunks[IDX_ANMF].tag);
const WebPData* const data = &frame_frgm_chunk->data_;
const size_t expected_data_size =
is_frame ? FRAME_CHUNK_SIZE : TILE_CHUNK_SIZE;
assert(frame_tile_chunk != NULL);
assert(tag == kChunks[IDX_FRAME].tag || tag == kChunks[IDX_TILE].tag);
if (data->size_ != expected_data_size) return WEBP_MUX_INVALID_ARGUMENT;
*x_offset = 2 * GetLE24(data->bytes_ + 0);
*y_offset = 2 * GetLE24(data->bytes_ + 3);
if (is_frame) *duration = 1 + GetLE24(data->bytes_ + 12);
is_frame ? ANMF_CHUNK_SIZE : FRGM_CHUNK_SIZE;
assert(frame_frgm_chunk != NULL);
assert(tag == kChunks[IDX_ANMF].tag || tag == kChunks[IDX_FRGM].tag);
if (data->size != expected_data_size) return WEBP_MUX_INVALID_ARGUMENT;
*x_offset = 2 * GetLE24(data->bytes + 0);
*y_offset = 2 * GetLE24(data->bytes + 3);
if (is_frame) *duration = GetLE24(data->bytes + 12);
return WEBP_MUX_OK;
}
@ -483,8 +431,8 @@ WebPMuxError MuxGetImageWidthHeight(const WebPChunk* const image_chunk,
assert(image_chunk != NULL);
assert(tag == kChunks[IDX_VP8].tag || tag == kChunks[IDX_VP8L].tag);
ok = (tag == kChunks[IDX_VP8].tag) ?
VP8GetInfo(data->bytes_, data->size_, data->size_, &w, &h) :
VP8LGetInfo(data->bytes_, data->size_, &w, &h, NULL);
VP8GetInfo(data->bytes, data->size, data->size, &w, &h) :
VP8LGetInfo(data->bytes, data->size, &w, &h, NULL);
if (ok) {
*width = w;
*height = h;
@ -499,11 +447,11 @@ static WebPMuxError GetImageInfo(const WebPMuxImage* const wpi,
int* const duration,
int* const width, int* const height) {
const WebPChunk* const image_chunk = wpi->img_;
const WebPChunk* const frame_tile_chunk = wpi->header_;
const WebPChunk* const frame_frgm_chunk = wpi->header_;
// Get offsets and duration from FRM/TILE chunk.
// Get offsets and duration from ANMF/FRGM chunk.
const WebPMuxError err =
GetFrameTileInfo(frame_tile_chunk, x_offset, y_offset, duration);
GetFrameFragmentInfo(frame_frgm_chunk, x_offset, y_offset, duration);
if (err != WEBP_MUX_OK) return err;
// Get width and height from VP8/VP8L chunk.
@ -525,7 +473,7 @@ static WebPMuxError GetImageCanvasWidthHeight(
int max_x = 0;
int max_y = 0;
int64_t image_area = 0;
// Aggregate the bounding box for animation frames & tiled images.
// Aggregate the bounding box for animation frames & fragmented images.
for (; wpi != NULL; wpi = wpi->next_) {
int x_offset, y_offset, duration, w, h;
const WebPMuxError err = GetImageInfo(wpi, &x_offset, &y_offset,
@ -542,11 +490,11 @@ static WebPMuxError GetImageCanvasWidthHeight(
}
*width = max_x;
*height = max_y;
// Crude check to validate that there are no image overlaps/holes for tile
// images. Check that the aggregated image area for individual tiles exactly
// matches the image area of the constructed canvas. However, the area-match
// is necessary but not sufficient condition.
if ((flags & TILE_FLAG) && (image_area != (max_x * max_y))) {
// Crude check to validate that there are no image overlaps/holes for
// fragmented images. Check that the aggregated image area for individual
// fragments exactly matches the image area of the constructed canvas.
// However, the area-match is necessary but not sufficient condition.
if ((flags & FRAGMENTS_FLAG) && (image_area != (max_x * max_y))) {
*width = 0;
*height = 0;
return WEBP_MUX_INVALID_ARGUMENT;
@ -580,34 +528,34 @@ static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
assert(mux != NULL);
images = mux->images_; // First image.
if (images == NULL || images->img_ == NULL ||
images->img_->data_.bytes_ == NULL) {
images->img_->data_.bytes == NULL) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// If VP8X chunk(s) is(are) already present, remove them (and later add new
// VP8X chunk with updated flags).
err = MuxDeleteAllNamedData(mux, IDX_VP8X);
err = MuxDeleteAllNamedData(mux, kChunks[IDX_VP8X].tag);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Set flags.
if (mux->iccp_ != NULL && mux->iccp_->data_.bytes_ != NULL) {
if (mux->iccp_ != NULL && mux->iccp_->data_.bytes != NULL) {
flags |= ICCP_FLAG;
}
if (mux->meta_ != NULL && mux->meta_->data_.bytes_ != NULL) {
flags |= META_FLAG;
if (mux->exif_ != NULL && mux->exif_->data_.bytes != NULL) {
flags |= EXIF_FLAG;
}
if (mux->xmp_ != NULL && mux->xmp_->data_.bytes != NULL) {
flags |= XMP_FLAG;
}
if (images->header_ != NULL) {
if (images->header_->tag_ == kChunks[IDX_TILE].tag) {
// This is a tiled image.
flags |= TILE_FLAG;
} else if (images->header_->tag_ == kChunks[IDX_FRAME].tag) {
if (images->header_->tag_ == kChunks[IDX_FRGM].tag) {
// This is a fragmented image.
flags |= FRAGMENTS_FLAG;
} else if (images->header_->tag_ == kChunks[IDX_ANMF].tag) {
// This is an image with animation.
flags |= ANIMATION_FLAG;
}
}
if (MuxImageCount(images, WEBP_CHUNK_ALPHA) > 0) {
flags |= ALPHA_FLAG; // Some images have an alpha channel.
}
@ -643,39 +591,63 @@ static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
return err;
}
// Cleans up 'mux' by removing any unnecessary chunks.
static WebPMuxError MuxCleanup(WebPMux* const mux) {
int num_frames;
int num_fragments;
int num_anim_chunks;
// If we have an image with single fragment or frame, convert it to a
// non-animated non-fragmented image (to avoid writing FRGM/ANMF chunk
// unnecessarily).
WebPMuxError err = WebPMuxNumChunks(mux, kChunks[IDX_ANMF].id, &num_frames);
if (err != WEBP_MUX_OK) return err;
err = WebPMuxNumChunks(mux, kChunks[IDX_FRGM].id, &num_fragments);
if (err != WEBP_MUX_OK) return err;
if (num_frames == 1 || num_fragments == 1) {
WebPMuxImage* frame_frag;
err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, &frame_frag);
assert(err == WEBP_MUX_OK); // We know that one frame/fragment does exist.
if (frame_frag->header_ != NULL) {
assert(frame_frag->header_->tag_ == kChunks[IDX_ANMF].tag ||
frame_frag->header_->tag_ == kChunks[IDX_FRGM].tag);
ChunkDelete(frame_frag->header_); // Removes ANMF/FRGM chunk.
frame_frag->header_ = NULL;
}
num_frames = 0;
num_fragments = 0;
}
// Remove ANIM chunk if this is a non-animated image.
err = WebPMuxNumChunks(mux, kChunks[IDX_ANIM].id, &num_anim_chunks);
if (err != WEBP_MUX_OK) return err;
if (num_anim_chunks >= 1 && num_frames == 0) {
err = MuxDeleteAllNamedData(mux, kChunks[IDX_ANIM].tag);
if (err != WEBP_MUX_OK) return err;
}
return WEBP_MUX_OK;
}
WebPMuxError WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data) {
size_t size = 0;
uint8_t* data = NULL;
uint8_t* dst = NULL;
int num_frames;
int num_loop_chunks;
WebPMuxError err;
if (mux == NULL || assembled_data == NULL) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// Remove LOOP chunk if unnecessary.
err = WebPMuxNumChunks(mux, kChunks[IDX_LOOP].id, &num_loop_chunks);
// Finalize mux.
err = MuxCleanup(mux);
if (err != WEBP_MUX_OK) return err;
if (num_loop_chunks >= 1) {
err = WebPMuxNumChunks(mux, kChunks[IDX_FRAME].id, &num_frames);
if (err != WEBP_MUX_OK) return err;
if (num_frames == 0) {
err = DeleteLoopCount(mux);
if (err != WEBP_MUX_OK) return err;
}
}
// Create VP8X chunk.
err = CreateVP8XChunk(mux);
if (err != WEBP_MUX_OK) return err;
// Allocate data.
size = ChunksListDiskSize(mux->vp8x_) + ChunksListDiskSize(mux->iccp_)
+ ChunksListDiskSize(mux->loop_) + MuxImageListDiskSize(mux->images_)
+ ChunksListDiskSize(mux->meta_) + ChunksListDiskSize(mux->unknown_)
+ RIFF_HEADER_SIZE;
+ ChunksListDiskSize(mux->anim_) + MuxImageListDiskSize(mux->images_)
+ ChunksListDiskSize(mux->exif_) + ChunksListDiskSize(mux->xmp_)
+ ChunksListDiskSize(mux->unknown_) + RIFF_HEADER_SIZE;
data = (uint8_t*)malloc(size);
if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
@ -684,9 +656,10 @@ WebPMuxError WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data) {
dst = MuxEmitRiffHeader(data, size);
dst = ChunkListEmit(mux->vp8x_, dst);
dst = ChunkListEmit(mux->iccp_, dst);
dst = ChunkListEmit(mux->loop_, dst);
dst = ChunkListEmit(mux->anim_, dst);
dst = MuxImageListEmit(mux->images_, dst);
dst = ChunkListEmit(mux->meta_, dst);
dst = ChunkListEmit(mux->exif_, dst);
dst = ChunkListEmit(mux->xmp_, dst);
dst = ChunkListEmit(mux->unknown_, dst);
assert(dst == data + size);
@ -698,9 +671,9 @@ WebPMuxError WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data) {
size = 0;
}
// Finalize.
assembled_data->bytes_ = data;
assembled_data->size_ = size;
// Finalize data.
assembled_data->bytes = data;
assembled_data->size = size;
return err;
}

@ -15,7 +15,6 @@
#include <stdlib.h>
#include "../dec/vp8i.h"
#include "../dec/vp8li.h"
#include "../webp/format_constants.h"
#include "../webp/mux.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -25,22 +24,26 @@ extern "C" {
//------------------------------------------------------------------------------
// Defines and constants.
#define MUX_MAJ_VERSION 0
#define MUX_MIN_VERSION 1
#define MUX_REV_VERSION 0
// Chunk object.
typedef struct WebPChunk WebPChunk;
struct WebPChunk {
uint32_t tag_;
int owner_; // True if *data_ memory is owned internally.
// VP8X, Loop, and other internally created chunks
// like frame/tile are always owned.
// VP8X, ANIM, and other internally created chunks
// like ANMF/FRGM are always owned.
WebPData data_;
WebPChunk* next_;
};
// MuxImage object. Store a full webp image (including frame/tile chunk, alpha
// MuxImage object. Store a full WebP image (including ANMF/FRGM chunk, ALPH
// chunk and VP8/VP8L chunk),
typedef struct WebPMuxImage WebPMuxImage;
struct WebPMuxImage {
WebPChunk* header_; // Corresponds to WEBP_CHUNK_FRAME/WEBP_CHUNK_TILE.
WebPChunk* header_; // Corresponds to WEBP_CHUNK_ANMF/WEBP_CHUNK_FRGM.
WebPChunk* alpha_; // Corresponds to WEBP_CHUNK_ALPHA.
WebPChunk* img_; // Corresponds to WEBP_CHUNK_IMAGE.
int is_partial_; // True if only some of the chunks are filled.
@ -51,8 +54,9 @@ struct WebPMuxImage {
struct WebPMux {
WebPMuxImage* images_;
WebPChunk* iccp_;
WebPChunk* meta_;
WebPChunk* loop_;
WebPChunk* exif_;
WebPChunk* xmp_;
WebPChunk* anim_;
WebPChunk* vp8x_;
WebPChunk* unknown_;
@ -65,13 +69,14 @@ struct WebPMux {
typedef enum {
IDX_VP8X = 0,
IDX_ICCP,
IDX_LOOP,
IDX_FRAME,
IDX_TILE,
IDX_ANIM,
IDX_ANMF,
IDX_FRGM,
IDX_ALPHA,
IDX_VP8,
IDX_VP8L,
IDX_META,
IDX_EXIF,
IDX_XMP,
IDX_UNKNOWN,
IDX_NIL,
@ -80,8 +85,6 @@ typedef enum {
#define NIL_TAG 0x00000000u // To signal void chunk.
#define MKFOURCC(a, b, c, d) ((uint32_t)(a) | (b) << 8 | (c) << 16 | (d) << 24)
typedef struct {
uint32_t tag;
WebPChunkId id;
@ -90,44 +93,6 @@ typedef struct {
extern const ChunkInfo kChunks[IDX_LAST_CHUNK];
//------------------------------------------------------------------------------
// Helper functions.
// Read 16, 24 or 32 bits stored in little-endian order.
static WEBP_INLINE int GetLE16(const uint8_t* const data) {
return (int)(data[0] << 0) | (data[1] << 8);
}
static WEBP_INLINE int GetLE24(const uint8_t* const data) {
return GetLE16(data) | (data[2] << 16);
}
static WEBP_INLINE uint32_t GetLE32(const uint8_t* const data) {
return (uint32_t)GetLE16(data) | (GetLE16(data + 2) << 16);
}
// Store 16, 24 or 32 bits in little-endian order.
static WEBP_INLINE void PutLE16(uint8_t* const data, int val) {
assert(val < (1 << 16));
data[0] = (val >> 0);
data[1] = (val >> 8);
}
static WEBP_INLINE void PutLE24(uint8_t* const data, int val) {
assert(val < (1 << 24));
PutLE16(data, val & 0xffff);
data[2] = (val >> 16);
}
static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
PutLE16(data, (int)(val & 0xffff));
PutLE16(data + 2, (int)(val >> 16));
}
static WEBP_INLINE size_t SizeWithPadding(size_t chunk_size) {
return CHUNK_HEADER_SIZE + ((chunk_size + 1) & ~1U);
}
//------------------------------------------------------------------------------
// Chunk object management.
@ -140,6 +105,12 @@ CHUNK_INDEX ChunkGetIndexFromTag(uint32_t tag);
// Get chunk id from chunk tag. Returns WEBP_CHUNK_NIL if not found.
WebPChunkId ChunkGetIdFromTag(uint32_t tag);
// Convert a fourcc string to a tag.
uint32_t ChunkGetTagFromFourCC(const char fourcc[4]);
// Get chunk index from fourcc. Returns IDX_UNKNOWN if given fourcc is unknown.
CHUNK_INDEX ChunkGetIndexFromFourCC(const char fourcc[4]);
// Search for nth chunk with given 'tag' in the chunk list.
// nth = 0 means "last of the list".
WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag);
@ -150,7 +121,8 @@ WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
// Sets 'chunk' at nth position in the 'chunk_list'.
// nth = 0 has the special meaning "last of the list".
WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
// On success ownership is transferred from 'chunk' to the 'chunk_list'.
WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list,
uint32_t nth);
// Releases chunk and returns chunk->next_.
@ -159,9 +131,14 @@ WebPChunk* ChunkRelease(WebPChunk* const chunk);
// Deletes given chunk & returns chunk->next_.
WebPChunk* ChunkDelete(WebPChunk* const chunk);
// Returns size of the chunk including chunk header and padding byte (if any).
static WEBP_INLINE size_t SizeWithPadding(size_t chunk_size) {
return CHUNK_HEADER_SIZE + ((chunk_size + 1) & ~1U);
}
// Size of a chunk including header and padding.
static WEBP_INLINE size_t ChunkDiskSize(const WebPChunk* chunk) {
const size_t data_size = chunk->data_.size_;
const size_t data_size = chunk->data_.size;
assert(data_size < MAX_CHUNK_PAYLOAD);
return SizeWithPadding(data_size);
}
@ -193,13 +170,14 @@ WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi);
void MuxImageDeleteAll(WebPMuxImage** const wpi_list);
// Count number of images matching the given tag id in the 'wpi_list'.
// If id == WEBP_CHUNK_NIL, all images will be matched.
int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id);
// Check if given ID corresponds to an image related chunk.
static WEBP_INLINE int IsWPI(WebPChunkId id) {
switch (id) {
case WEBP_CHUNK_FRAME:
case WEBP_CHUNK_TILE:
case WEBP_CHUNK_ANMF:
case WEBP_CHUNK_FRGM:
case WEBP_CHUNK_ALPHA:
case WEBP_CHUNK_IMAGE: return 1;
default: return 0;
@ -211,8 +189,8 @@ static WEBP_INLINE WebPChunk** MuxImageGetListFromId(
const WebPMuxImage* const wpi, WebPChunkId id) {
assert(wpi != NULL);
switch (id) {
case WEBP_CHUNK_FRAME:
case WEBP_CHUNK_TILE: return (WebPChunk**)&wpi->header_;
case WEBP_CHUNK_ANMF:
case WEBP_CHUNK_FRGM: return (WebPChunk**)&wpi->header_;
case WEBP_CHUNK_ALPHA: return (WebPChunk**)&wpi->alpha_;
case WEBP_CHUNK_IMAGE: return (WebPChunk**)&wpi->img_;
default: return NULL;
@ -222,13 +200,12 @@ static WEBP_INLINE WebPChunk** MuxImageGetListFromId(
// Pushes 'wpi' at the end of 'wpi_list'.
WebPMuxError MuxImagePush(const WebPMuxImage* wpi, WebPMuxImage** wpi_list);
// Delete nth image in the image list with given tag id.
WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id);
// Delete nth image in the image list.
WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth);
// Get nth image in the image list with given tag id.
// Get nth image in the image list.
WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id, WebPMuxImage** wpi);
WebPMuxImage** wpi);
// Total size of the given image.
size_t MuxImageDiskSize(const WebPMuxImage* const wpi);

@ -12,6 +12,7 @@
#include <assert.h>
#include "./muxi.h"
#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -22,18 +23,25 @@ extern "C" {
const ChunkInfo kChunks[] = {
{ MKFOURCC('V', 'P', '8', 'X'), WEBP_CHUNK_VP8X, VP8X_CHUNK_SIZE },
{ MKFOURCC('I', 'C', 'C', 'P'), WEBP_CHUNK_ICCP, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('L', 'O', 'O', 'P'), WEBP_CHUNK_LOOP, LOOP_CHUNK_SIZE },
{ MKFOURCC('F', 'R', 'M', ' '), WEBP_CHUNK_FRAME, FRAME_CHUNK_SIZE },
{ MKFOURCC('T', 'I', 'L', 'E'), WEBP_CHUNK_TILE, TILE_CHUNK_SIZE },
{ MKFOURCC('A', 'N', 'I', 'M'), WEBP_CHUNK_ANIM, ANIM_CHUNK_SIZE },
{ MKFOURCC('A', 'N', 'M', 'F'), WEBP_CHUNK_ANMF, ANMF_CHUNK_SIZE },
{ MKFOURCC('F', 'R', 'G', 'M'), WEBP_CHUNK_FRGM, FRGM_CHUNK_SIZE },
{ MKFOURCC('A', 'L', 'P', 'H'), WEBP_CHUNK_ALPHA, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('V', 'P', '8', ' '), WEBP_CHUNK_IMAGE, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('V', 'P', '8', 'L'), WEBP_CHUNK_IMAGE, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('M', 'E', 'T', 'A'), WEBP_CHUNK_META, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('E', 'X', 'I', 'F'), WEBP_CHUNK_EXIF, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('X', 'M', 'P', ' '), WEBP_CHUNK_XMP, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('U', 'N', 'K', 'N'), WEBP_CHUNK_UNKNOWN, UNDEFINED_CHUNK_SIZE },
{ NIL_TAG, WEBP_CHUNK_NIL, UNDEFINED_CHUNK_SIZE }
{ NIL_TAG, WEBP_CHUNK_NIL, UNDEFINED_CHUNK_SIZE }
};
//------------------------------------------------------------------------------
int WebPGetMuxVersion(void) {
return (MUX_MAJ_VERSION << 16) | (MUX_MIN_VERSION << 8) | MUX_REV_VERSION;
}
//------------------------------------------------------------------------------
// Life of a chunk object.
@ -73,12 +81,22 @@ WebPChunkId ChunkGetIdFromTag(uint32_t tag) {
return WEBP_CHUNK_NIL;
}
uint32_t ChunkGetTagFromFourCC(const char fourcc[4]) {
return MKFOURCC(fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
}
CHUNK_INDEX ChunkGetIndexFromFourCC(const char fourcc[4]) {
const uint32_t tag = ChunkGetTagFromFourCC(fourcc);
const CHUNK_INDEX idx = ChunkGetIndexFromTag(tag);
return (idx == IDX_NIL) ? IDX_UNKNOWN : idx;
}
//------------------------------------------------------------------------------
// Chunk search methods.
// Returns next chunk in the chunk list with the given tag.
static WebPChunk* ChunkSearchNextInList(WebPChunk* chunk, uint32_t tag) {
while (chunk && chunk->tag_ != tag) {
while (chunk != NULL && chunk->tag_ != tag) {
chunk = chunk->next_;
}
return chunk;
@ -87,7 +105,7 @@ static WebPChunk* ChunkSearchNextInList(WebPChunk* chunk, uint32_t tag) {
WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag) {
uint32_t iter = nth;
first = ChunkSearchNextInList(first, tag);
if (!first) return NULL;
if (first == NULL) return NULL;
while (--iter != 0) {
WebPChunk* next_chunk = ChunkSearchNextInList(first->next_, tag);
@ -99,14 +117,14 @@ WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag) {
// Outputs a pointer to 'prev_chunk->next_',
// where 'prev_chunk' is the pointer to the chunk at position (nth - 1).
// Returns 1 if nth chunk was found, 0 otherwise.
// Returns true if nth chunk was found.
static int ChunkSearchListToSet(WebPChunk** chunk_list, uint32_t nth,
WebPChunk*** const location) {
uint32_t count = 0;
assert(chunk_list);
assert(chunk_list != NULL);
*location = chunk_list;
while (*chunk_list) {
while (*chunk_list != NULL) {
WebPChunk* const cur_chunk = *chunk_list;
++count;
if (count == nth) return 1; // Found.
@ -124,34 +142,25 @@ static int ChunkSearchListToSet(WebPChunk** chunk_list, uint32_t nth,
WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
int copy_data, uint32_t tag) {
// For internally allocated chunks, always copy data & make it owner of data.
if (tag == kChunks[IDX_VP8X].tag || tag == kChunks[IDX_LOOP].tag) {
if (tag == kChunks[IDX_VP8X].tag || tag == kChunks[IDX_ANIM].tag) {
copy_data = 1;
}
ChunkRelease(chunk);
if (data != NULL) {
if (copy_data) {
// Copy data.
chunk->data_.bytes_ = (uint8_t*)malloc(data->size_);
if (chunk->data_.bytes_ == NULL) return WEBP_MUX_MEMORY_ERROR;
memcpy((uint8_t*)chunk->data_.bytes_, data->bytes_, data->size_);
chunk->data_.size_ = data->size_;
// Chunk is owner of data.
chunk->owner_ = 1;
} else {
// Don't copy data.
if (copy_data) { // Copy data.
if (!WebPDataCopy(data, &chunk->data_)) return WEBP_MUX_MEMORY_ERROR;
chunk->owner_ = 1; // Chunk is owner of data.
} else { // Don't copy data.
chunk->data_ = *data;
}
}
chunk->tag_ = tag;
return WEBP_MUX_OK;
}
WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list,
uint32_t nth) {
WebPChunk* new_chunk;
@ -162,6 +171,7 @@ WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
new_chunk = (WebPChunk*)malloc(sizeof(*new_chunk));
if (new_chunk == NULL) return WEBP_MUX_MEMORY_ERROR;
*new_chunk = *chunk;
chunk->owner_ = 0;
new_chunk->next_ = *chunk_list;
*chunk_list = new_chunk;
return WEBP_MUX_OK;
@ -181,7 +191,7 @@ WebPChunk* ChunkDelete(WebPChunk* const chunk) {
size_t ChunksListDiskSize(const WebPChunk* chunk_list) {
size_t size = 0;
while (chunk_list) {
while (chunk_list != NULL) {
size += ChunkDiskSize(chunk_list);
chunk_list = chunk_list->next_;
}
@ -189,55 +199,26 @@ size_t ChunksListDiskSize(const WebPChunk* chunk_list) {
}
static uint8_t* ChunkEmit(const WebPChunk* const chunk, uint8_t* dst) {
const size_t chunk_size = chunk->data_.size_;
const size_t chunk_size = chunk->data_.size;
assert(chunk);
assert(chunk->tag_ != NIL_TAG);
PutLE32(dst + 0, chunk->tag_);
PutLE32(dst + TAG_SIZE, (uint32_t)chunk_size);
assert(chunk_size == (uint32_t)chunk_size);
memcpy(dst + CHUNK_HEADER_SIZE, chunk->data_.bytes_, chunk_size);
memcpy(dst + CHUNK_HEADER_SIZE, chunk->data_.bytes, chunk_size);
if (chunk_size & 1)
dst[CHUNK_HEADER_SIZE + chunk_size] = 0; // Add padding.
return dst + ChunkDiskSize(chunk);
}
uint8_t* ChunkListEmit(const WebPChunk* chunk_list, uint8_t* dst) {
while (chunk_list) {
while (chunk_list != NULL) {
dst = ChunkEmit(chunk_list, dst);
chunk_list = chunk_list->next_;
}
return dst;
}
//------------------------------------------------------------------------------
// Manipulation of a WebPData object.
void WebPDataInit(WebPData* webp_data) {
if (webp_data != NULL) {
memset(webp_data, 0, sizeof(*webp_data));
}
}
void WebPDataClear(WebPData* webp_data) {
if (webp_data != NULL) {
free((void*)webp_data->bytes_);
WebPDataInit(webp_data);
}
}
int WebPDataCopy(const WebPData* src, WebPData* dst) {
if (src == NULL || dst == NULL) return 0;
WebPDataInit(dst);
if (src->bytes_ != NULL && src->size_ != 0) {
dst->bytes_ = (uint8_t*)malloc(src->size_);
if (dst->bytes_ == NULL) return 0;
memcpy((void*)dst->bytes_, src->bytes_, src->size_);
dst->size_ = src->size_;
}
return 1;
}
//------------------------------------------------------------------------------
// Life of a MuxImage object.
@ -265,10 +246,14 @@ int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id) {
int count = 0;
const WebPMuxImage* current;
for (current = wpi_list; current != NULL; current = current->next_) {
const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(current, id);
if (wpi_chunk != NULL) {
const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
if (wpi_chunk_id == id) ++count;
if (id == WEBP_CHUNK_NIL) {
++count; // Special case: count all images.
} else {
const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(current, id);
if (wpi_chunk != NULL) {
const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
if (wpi_chunk_id == id) ++count; // Count images with a matching 'id'.
}
}
}
return count;
@ -276,34 +261,22 @@ int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id) {
// Outputs a pointer to 'prev_wpi->next_',
// where 'prev_wpi' is the pointer to the image at position (nth - 1).
// Returns 1 if nth image with given id was found, 0 otherwise.
// Returns true if nth image was found.
static int SearchImageToGetOrDelete(WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id,
WebPMuxImage*** const location) {
uint32_t count = 0;
assert(wpi_list);
*location = wpi_list;
// Search makes sense only for the following.
assert(id == WEBP_CHUNK_FRAME || id == WEBP_CHUNK_TILE ||
id == WEBP_CHUNK_IMAGE);
assert(id != WEBP_CHUNK_IMAGE || nth == 1);
if (nth == 0) {
nth = MuxImageCount(*wpi_list, id);
nth = MuxImageCount(*wpi_list, WEBP_CHUNK_NIL);
if (nth == 0) return 0; // Not found.
}
while (*wpi_list) {
while (*wpi_list != NULL) {
WebPMuxImage* const cur_wpi = *wpi_list;
const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(cur_wpi, id);
if (wpi_chunk != NULL) {
const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
if (wpi_chunk_id == id) {
++count;
if (count == nth) return 1; // Found.
}
}
++count;
if (count == nth) return 1; // Found.
wpi_list = &cur_wpi->next_;
*location = wpi_list;
}
@ -346,15 +319,14 @@ WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi) {
}
void MuxImageDeleteAll(WebPMuxImage** const wpi_list) {
while (*wpi_list) {
while (*wpi_list != NULL) {
*wpi_list = MuxImageDelete(*wpi_list);
}
}
WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id) {
WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth) {
assert(wpi_list);
if (!SearchImageToGetOrDelete(wpi_list, nth, id, &wpi_list)) {
if (!SearchImageToGetOrDelete(wpi_list, nth, &wpi_list)) {
return WEBP_MUX_NOT_FOUND;
}
*wpi_list = MuxImageDelete(*wpi_list);
@ -365,10 +337,10 @@ WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
// MuxImage reader methods.
WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id, WebPMuxImage** wpi) {
WebPMuxImage** wpi) {
assert(wpi_list);
assert(wpi);
if (!SearchImageToGetOrDelete((WebPMuxImage**)wpi_list, nth, id,
if (!SearchImageToGetOrDelete((WebPMuxImage**)wpi_list, nth,
(WebPMuxImage***)&wpi_list)) {
return WEBP_MUX_NOT_FOUND;
}
@ -390,27 +362,46 @@ size_t MuxImageDiskSize(const WebPMuxImage* const wpi) {
size_t MuxImageListDiskSize(const WebPMuxImage* wpi_list) {
size_t size = 0;
while (wpi_list) {
while (wpi_list != NULL) {
size += MuxImageDiskSize(wpi_list);
wpi_list = wpi_list->next_;
}
return size;
}
// Special case as ANMF/FRGM chunk encapsulates other image chunks.
static uint8_t* ChunkEmitSpecial(const WebPChunk* const header,
size_t total_size, uint8_t* dst) {
const size_t header_size = header->data_.size;
const size_t offset_to_next = total_size - CHUNK_HEADER_SIZE;
assert(header->tag_ == kChunks[IDX_ANMF].tag ||
header->tag_ == kChunks[IDX_FRGM].tag);
PutLE32(dst + 0, header->tag_);
PutLE32(dst + TAG_SIZE, (uint32_t)offset_to_next);
assert(header_size == (uint32_t)header_size);
memcpy(dst + CHUNK_HEADER_SIZE, header->data_.bytes, header_size);
if (header_size & 1) {
dst[CHUNK_HEADER_SIZE + header_size] = 0; // Add padding.
}
return dst + ChunkDiskSize(header);
}
uint8_t* MuxImageEmit(const WebPMuxImage* const wpi, uint8_t* dst) {
// Ordering of chunks to be emitted is strictly as follows:
// 1. Frame/Tile chunk (if present).
// 2. Alpha chunk (if present).
// 1. ANMF/FRGM chunk (if present).
// 2. ALPH chunk (if present).
// 3. VP8/VP8L chunk.
assert(wpi);
if (wpi->header_ != NULL) dst = ChunkEmit(wpi->header_, dst);
if (wpi->header_ != NULL) {
dst = ChunkEmitSpecial(wpi->header_, MuxImageDiskSize(wpi), dst);
}
if (wpi->alpha_ != NULL) dst = ChunkEmit(wpi->alpha_, dst);
if (wpi->img_ != NULL) dst = ChunkEmit(wpi->img_, dst);
return dst;
}
uint8_t* MuxImageListEmit(const WebPMuxImage* wpi_list, uint8_t* dst) {
while (wpi_list) {
while (wpi_list != NULL) {
dst = MuxImageEmit(wpi_list, dst);
wpi_list = wpi_list->next_;
}
@ -441,11 +432,12 @@ uint8_t* MuxEmitRiffHeader(uint8_t* const data, size_t size) {
WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id) {
assert(mux != NULL);
switch(id) {
switch (id) {
case WEBP_CHUNK_VP8X: return (WebPChunk**)&mux->vp8x_;
case WEBP_CHUNK_ICCP: return (WebPChunk**)&mux->iccp_;
case WEBP_CHUNK_LOOP: return (WebPChunk**)&mux->loop_;
case WEBP_CHUNK_META: return (WebPChunk**)&mux->meta_;
case WEBP_CHUNK_ANIM: return (WebPChunk**)&mux->anim_;
case WEBP_CHUNK_EXIF: return (WebPChunk**)&mux->exif_;
case WEBP_CHUNK_XMP: return (WebPChunk**)&mux->xmp_;
case WEBP_CHUNK_UNKNOWN: return (WebPChunk**)&mux->unknown_;
default: return NULL;
}
@ -453,17 +445,17 @@ WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id) {
WebPMuxError MuxValidateForImage(const WebPMux* const mux) {
const int num_images = MuxImageCount(mux->images_, WEBP_CHUNK_IMAGE);
const int num_frames = MuxImageCount(mux->images_, WEBP_CHUNK_FRAME);
const int num_tiles = MuxImageCount(mux->images_, WEBP_CHUNK_TILE);
const int num_frames = MuxImageCount(mux->images_, WEBP_CHUNK_ANMF);
const int num_fragments = MuxImageCount(mux->images_, WEBP_CHUNK_FRGM);
if (num_images == 0) {
// No images in mux.
return WEBP_MUX_NOT_FOUND;
} else if (num_images == 1 && num_frames == 0 && num_tiles == 0) {
} else if (num_images == 1 && num_frames == 0 && num_fragments == 0) {
// Valid case (single image).
return WEBP_MUX_OK;
} else {
// Frame/Tile case OR an invalid mux.
// Frame/Fragment case OR an invalid mux.
return WEBP_MUX_INVALID_ARGUMENT;
}
}
@ -494,10 +486,11 @@ static WebPMuxError ValidateChunk(const WebPMux* const mux, CHUNK_INDEX idx,
WebPMuxError MuxValidate(const WebPMux* const mux) {
int num_iccp;
int num_meta;
int num_loop_chunks;
int num_exif;
int num_xmp;
int num_anim;
int num_frames;
int num_tiles;
int num_fragments;
int num_vp8x;
int num_images;
int num_alpha;
@ -517,29 +510,33 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
err = ValidateChunk(mux, IDX_ICCP, ICCP_FLAG, flags, 1, &num_iccp);
if (err != WEBP_MUX_OK) return err;
// At most one EXIF metadata.
err = ValidateChunk(mux, IDX_EXIF, EXIF_FLAG, flags, 1, &num_exif);
if (err != WEBP_MUX_OK) return err;
// At most one XMP metadata.
err = ValidateChunk(mux, IDX_META, META_FLAG, flags, 1, &num_meta);
err = ValidateChunk(mux, IDX_XMP, XMP_FLAG, flags, 1, &num_xmp);
if (err != WEBP_MUX_OK) return err;
// Animation: ANIMATION_FLAG, loop chunk and frame chunk(s) are consistent.
// At most one loop chunk.
err = ValidateChunk(mux, IDX_LOOP, NO_FLAG, flags, 1, &num_loop_chunks);
// Animation: ANIMATION_FLAG, ANIM chunk and ANMF chunk(s) are consistent.
// At most one ANIM chunk.
err = ValidateChunk(mux, IDX_ANIM, NO_FLAG, flags, 1, &num_anim);
if (err != WEBP_MUX_OK) return err;
err = ValidateChunk(mux, IDX_FRAME, NO_FLAG, flags, -1, &num_frames);
err = ValidateChunk(mux, IDX_ANMF, NO_FLAG, flags, -1, &num_frames);
if (err != WEBP_MUX_OK) return err;
{
const int has_animation = !!(flags & ANIMATION_FLAG);
if (has_animation && (num_loop_chunks == 0 || num_frames == 0)) {
if (has_animation && (num_anim == 0 || num_frames == 0)) {
return WEBP_MUX_INVALID_ARGUMENT;
}
if (!has_animation && (num_loop_chunks == 1 || num_frames > 0)) {
if (!has_animation && (num_anim == 1 || num_frames > 0)) {
return WEBP_MUX_INVALID_ARGUMENT;
}
}
// Tiling: TILE_FLAG and tile chunk(s) are consistent.
err = ValidateChunk(mux, IDX_TILE, TILE_FLAG, flags, -1, &num_tiles);
// Fragmentation: FRAGMENTS_FLAG and FRGM chunk(s) are consistent.
err = ValidateChunk(mux, IDX_FRGM, FRAGMENTS_FLAG, flags, -1, &num_fragments);
if (err != WEBP_MUX_OK) return err;
// Verify either VP8X chunk is present OR there is only one elem in
@ -551,16 +548,18 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
if (num_vp8x == 0 && num_images != 1) return WEBP_MUX_INVALID_ARGUMENT;
// ALPHA_FLAG & alpha chunk(s) are consistent.
if (num_vp8x > 0 && MuxHasLosslessImages(mux->images_)) {
// Special case: we have a VP8X chunk as well as some lossless images.
if (!(flags & ALPHA_FLAG)) return WEBP_MUX_INVALID_ARGUMENT;
if (MuxHasLosslessImages(mux->images_)) {
if (num_vp8x > 0) {
// Special case: we have a VP8X chunk as well as some lossless images.
if (!(flags & ALPHA_FLAG)) return WEBP_MUX_INVALID_ARGUMENT;
}
} else {
err = ValidateChunk(mux, IDX_ALPHA, ALPHA_FLAG, flags, -1, &num_alpha);
if (err != WEBP_MUX_OK) return err;
err = ValidateChunk(mux, IDX_ALPHA, ALPHA_FLAG, flags, -1, &num_alpha);
if (err != WEBP_MUX_OK) return err;
}
// num_tiles & num_images are consistent.
if (num_tiles > 0 && num_images != num_tiles) {
// num_fragments & num_images are consistent.
if (num_fragments > 0 && num_images != num_fragments) {
return WEBP_MUX_INVALID_ARGUMENT;
}

@ -12,6 +12,7 @@
#include <assert.h>
#include "./muxi.h"
#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -41,8 +42,9 @@ static WebPMuxError MuxGet(const WebPMux* const mux, CHUNK_INDEX idx,
SWITCH_ID_LIST(IDX_VP8X, mux->vp8x_);
SWITCH_ID_LIST(IDX_ICCP, mux->iccp_);
SWITCH_ID_LIST(IDX_LOOP, mux->loop_);
SWITCH_ID_LIST(IDX_META, mux->meta_);
SWITCH_ID_LIST(IDX_ANIM, mux->anim_);
SWITCH_ID_LIST(IDX_EXIF, mux->exif_);
SWITCH_ID_LIST(IDX_XMP, mux->xmp_);
SWITCH_ID_LIST(IDX_UNKNOWN, mux->unknown_);
return WEBP_MUX_NOT_FOUND;
}
@ -50,10 +52,9 @@ static WebPMuxError MuxGet(const WebPMux* const mux, CHUNK_INDEX idx,
// Fill the chunk with the given data (includes chunk header bytes), after some
// verifications.
static WebPMuxError ChunkVerifyAndAssignData(WebPChunk* chunk,
const uint8_t* data,
size_t data_size, size_t riff_size,
int copy_data) {
static WebPMuxError ChunkVerifyAndAssign(WebPChunk* chunk,
const uint8_t* data, size_t data_size,
size_t riff_size, int copy_data) {
uint32_t chunk_size;
WebPData chunk_data;
@ -68,11 +69,74 @@ static WebPMuxError ChunkVerifyAndAssignData(WebPChunk* chunk,
}
// Data assignment.
chunk_data.bytes_ = data + CHUNK_HEADER_SIZE;
chunk_data.size_ = chunk_size;
chunk_data.bytes = data + CHUNK_HEADER_SIZE;
chunk_data.size = chunk_size;
return ChunkAssignData(chunk, &chunk_data, copy_data, GetLE32(data + 0));
}
static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
WebPMuxImage* const wpi) {
const uint8_t* bytes = chunk->data_.bytes;
size_t size = chunk->data_.size;
const uint8_t* const last = bytes + size;
WebPChunk subchunk;
size_t subchunk_size;
ChunkInit(&subchunk);
assert(chunk->tag_ == kChunks[IDX_ANMF].tag ||
chunk->tag_ == kChunks[IDX_FRGM].tag);
assert(!wpi->is_partial_);
// ANMF/FRGM.
{
const size_t hdr_size = (chunk->tag_ == kChunks[IDX_ANMF].tag) ?
ANMF_CHUNK_SIZE : FRGM_CHUNK_SIZE;
const WebPData temp = { bytes, hdr_size };
// Each of ANMF and FRGM chunk contain a header at the beginning. So, its
// size should at least be 'hdr_size'.
if (size < hdr_size) goto Fail;
ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_);
}
ChunkSetNth(&subchunk, &wpi->header_, 1);
wpi->is_partial_ = 1; // Waiting for ALPH and/or VP8/VP8L chunks.
// Rest of the chunks.
subchunk_size = ChunkDiskSize(&subchunk) - CHUNK_HEADER_SIZE;
bytes += subchunk_size;
size -= subchunk_size;
while (bytes != last) {
ChunkInit(&subchunk);
if (ChunkVerifyAndAssign(&subchunk, bytes, size, size,
copy_data) != WEBP_MUX_OK) {
goto Fail;
}
switch (ChunkGetIdFromTag(subchunk.tag_)) {
case WEBP_CHUNK_ALPHA:
if (wpi->alpha_ != NULL) goto Fail; // Consecutive ALPH chunks.
if (ChunkSetNth(&subchunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Fail;
wpi->is_partial_ = 1; // Waiting for a VP8 chunk.
break;
case WEBP_CHUNK_IMAGE:
if (ChunkSetNth(&subchunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Fail;
wpi->is_partial_ = 0; // wpi is completely filled.
break;
default:
goto Fail;
break;
}
subchunk_size = ChunkDiskSize(&subchunk);
bytes += subchunk_size;
size -= subchunk_size;
}
if (wpi->is_partial_) goto Fail;
return 1;
Fail:
ChunkRelease(&subchunk);
return 0;
}
//------------------------------------------------------------------------------
// Create a mux object from WebP-RIFF data.
@ -94,8 +158,8 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
}
if (bitstream == NULL) return NULL;
data = bitstream->bytes_;
size = bitstream->size_;
data = bitstream->bytes;
size = bitstream->size;
if (data == NULL) return NULL;
if (size < RIFF_HEADER_SIZE) return NULL;
@ -135,42 +199,48 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
// Loop over chunks.
while (data != end) {
size_t data_size;
WebPChunkId id;
WebPMuxError err;
err = ChunkVerifyAndAssignData(&chunk, data, size, riff_size, copy_data);
if (err != WEBP_MUX_OK) goto Err;
WebPChunk** chunk_list;
if (ChunkVerifyAndAssign(&chunk, data, size, riff_size,
copy_data) != WEBP_MUX_OK) {
goto Err;
}
data_size = ChunkDiskSize(&chunk);
id = ChunkGetIdFromTag(chunk.tag_);
if (IsWPI(id)) { // An image chunk (frame/tile/alpha/vp8).
WebPChunk** wpi_chunk_ptr =
MuxImageGetListFromId(wpi, id); // Image chunk to set.
assert(wpi_chunk_ptr != NULL);
if (*wpi_chunk_ptr != NULL) goto Err; // Consecutive alpha chunks or
// consecutive frame/tile chunks.
if (ChunkSetNth(&chunk, wpi_chunk_ptr, 1) != WEBP_MUX_OK) goto Err;
if (id == WEBP_CHUNK_IMAGE) {
switch (id) {
case WEBP_CHUNK_ALPHA:
if (wpi->alpha_ != NULL) goto Err; // Consecutive ALPH chunks.
if (ChunkSetNth(&chunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Err;
wpi->is_partial_ = 1; // Waiting for a VP8 chunk.
break;
case WEBP_CHUNK_IMAGE:
if (ChunkSetNth(&chunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Err;
wpi->is_partial_ = 0; // wpi is completely filled.
PushImage:
// Add this to mux->images_ list.
if (MuxImagePush(wpi, &mux->images_) != WEBP_MUX_OK) goto Err;
MuxImageInit(wpi); // Reset for reading next image.
} else {
wpi->is_partial_ = 1; // wpi is only partially filled.
}
} else { // A non-image chunk.
WebPChunk** chunk_list;
if (wpi->is_partial_) goto Err; // Encountered a non-image chunk before
// getting all chunks of an image.
chunk_list = MuxGetChunkListFromId(mux, id); // List to add this chunk.
if (chunk_list == NULL) chunk_list = &mux->unknown_;
if (ChunkSetNth(&chunk, chunk_list, 0) != WEBP_MUX_OK) goto Err;
}
{
const size_t data_size = ChunkDiskSize(&chunk);
data += data_size;
size -= data_size;
break;
case WEBP_CHUNK_ANMF:
#ifdef WEBP_EXPERIMENTAL_FEATURES
case WEBP_CHUNK_FRGM:
#endif
if (wpi->is_partial_) goto Err; // Previous wpi is still incomplete.
if (!MuxImageParse(&chunk, copy_data, wpi)) goto Err;
ChunkRelease(&chunk);
goto PushImage;
break;
default: // A non-image chunk.
if (wpi->is_partial_) goto Err; // Encountered a non-image chunk before
// getting all chunks of an image.
chunk_list = MuxGetChunkListFromId(mux, id); // List to add this chunk.
if (chunk_list == NULL) chunk_list = &mux->unknown_;
if (ChunkSetNth(&chunk, chunk_list, 0) != WEBP_MUX_OK) goto Err;
break;
}
data += data_size;
size -= data_size;
ChunkInit(&chunk);
}
@ -192,26 +262,30 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
WebPMuxError WebPMuxGetFeatures(const WebPMux* mux, uint32_t* flags) {
WebPData data;
WebPMuxError err;
if (mux == NULL || flags == NULL) return WEBP_MUX_INVALID_ARGUMENT;
*flags = 0;
// Check if VP8X chunk is present.
err = MuxGet(mux, IDX_VP8X, 1, &data);
if (err == WEBP_MUX_NOT_FOUND) {
// Check if VP8/VP8L chunk is present.
err = WebPMuxGetImage(mux, &data);
WebPDataClear(&data);
return err;
} else if (err != WEBP_MUX_OK) {
return err;
if (MuxGet(mux, IDX_VP8X, 1, &data) == WEBP_MUX_OK) {
if (data.size < CHUNK_SIZE_BYTES) return WEBP_MUX_BAD_DATA;
*flags = GetLE32(data.bytes); // All OK. Fill up flags.
} else {
WebPMuxError err = MuxValidateForImage(mux); // Check for single image.
if (err != WEBP_MUX_OK) return err;
if (MuxHasLosslessImages(mux->images_)) {
const WebPData* const vp8l_data = &mux->images_->img_->data_;
int has_alpha = 0;
if (!VP8LGetInfo(vp8l_data->bytes, vp8l_data->size, NULL, NULL,
&has_alpha)) {
return WEBP_MUX_BAD_DATA;
}
if (has_alpha) {
*flags = ALPHA_FLAG;
}
}
}
if (data.size_ < CHUNK_SIZE_BYTES) return WEBP_MUX_BAD_DATA;
// All OK. Fill up flags.
*flags = GetLE32(data.bytes_);
return WEBP_MUX_OK;
}
@ -230,7 +304,7 @@ static uint8_t* EmitVP8XChunk(uint8_t* const dst, int width,
}
// Assemble a single image WebP bitstream from 'wpi'.
static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
static WebPMuxError SynthesizeBitstream(const WebPMuxImage* const wpi,
WebPData* const bitstream) {
uint8_t* dst;
@ -238,7 +312,7 @@ static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
const int need_vp8x = (wpi->alpha_ != NULL);
const size_t vp8x_size = need_vp8x ? CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE : 0;
const size_t alpha_size = need_vp8x ? ChunkDiskSize(wpi->alpha_) : 0;
// Note: No need to output FRM/TILE chunk for a single image.
// Note: No need to output ANMF/FRGM chunk for a single image.
const size_t size = RIFF_HEADER_SIZE + vp8x_size + alpha_size +
ChunkDiskSize(wpi->img_);
uint8_t* const data = (uint8_t*)malloc(size);
@ -265,100 +339,101 @@ static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
assert(dst == data + size);
// Output.
bitstream->bytes_ = data;
bitstream->size_ = size;
bitstream->bytes = data;
bitstream->size = size;
return WEBP_MUX_OK;
}
WebPMuxError WebPMuxGetImage(const WebPMux* mux, WebPData* bitstream) {
WebPMuxError err;
WebPMuxImage* wpi = NULL;
if (mux == NULL || bitstream == NULL) {
WebPMuxError WebPMuxGetChunk(const WebPMux* mux, const char fourcc[4],
WebPData* chunk_data) {
CHUNK_INDEX idx;
if (mux == NULL || fourcc == NULL || chunk_data == NULL) {
return WEBP_MUX_INVALID_ARGUMENT;
}
err = MuxValidateForImage(mux);
if (err != WEBP_MUX_OK) return err;
// All well. Get the image.
err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, WEBP_CHUNK_IMAGE,
&wpi);
assert(err == WEBP_MUX_OK); // Already tested above.
return SynthesizeBitstream(wpi, bitstream);
}
WebPMuxError WebPMuxGetMetadata(const WebPMux* mux, WebPData* metadata) {
if (mux == NULL || metadata == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return MuxGet(mux, IDX_META, 1, metadata);
idx = ChunkGetIndexFromFourCC(fourcc);
if (IsWPI(kChunks[idx].id)) { // An image chunk.
return WEBP_MUX_INVALID_ARGUMENT;
} else if (idx != IDX_UNKNOWN) { // A known chunk type.
return MuxGet(mux, idx, 1, chunk_data);
} else { // An unknown chunk type.
const WebPChunk* const chunk =
ChunkSearchList(mux->unknown_, 1, ChunkGetTagFromFourCC(fourcc));
if (chunk == NULL) return WEBP_MUX_NOT_FOUND;
*chunk_data = chunk->data_;
return WEBP_MUX_OK;
}
}
WebPMuxError WebPMuxGetColorProfile(const WebPMux* mux,
WebPData* color_profile) {
if (mux == NULL || color_profile == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return MuxGet(mux, IDX_ICCP, 1, color_profile);
static WebPMuxError MuxGetImageInternal(const WebPMuxImage* const wpi,
WebPMuxFrameInfo* const info) {
// Set some defaults for unrelated fields.
info->x_offset = 0;
info->y_offset = 0;
info->duration = 1;
// Extract data for related fields.
info->id = ChunkGetIdFromTag(wpi->img_->tag_);
return SynthesizeBitstream(wpi, &info->bitstream);
}
WebPMuxError WebPMuxGetLoopCount(const WebPMux* mux, int* loop_count) {
WebPData image;
WebPMuxError err;
if (mux == NULL || loop_count == NULL) return WEBP_MUX_INVALID_ARGUMENT;
err = MuxGet(mux, IDX_LOOP, 1, &image);
if (err != WEBP_MUX_OK) return err;
if (image.size_ < kChunks[WEBP_CHUNK_LOOP].size) return WEBP_MUX_BAD_DATA;
*loop_count = GetLE16(image.bytes_);
return WEBP_MUX_OK;
static WebPMuxError MuxGetFrameFragmentInternal(const WebPMuxImage* const wpi,
WebPMuxFrameInfo* const frame) {
const int is_frame = (wpi->header_->tag_ == kChunks[IDX_ANMF].tag);
const CHUNK_INDEX idx = is_frame ? IDX_ANMF : IDX_FRGM;
const WebPData* frame_frgm_data;
#ifndef WEBP_EXPERIMENTAL_FEATURES
if (!is_frame) return WEBP_MUX_INVALID_ARGUMENT;
#endif
assert(wpi->header_ != NULL); // Already checked by WebPMuxGetFrame().
// Get frame/fragment chunk.
frame_frgm_data = &wpi->header_->data_;
if (frame_frgm_data->size < kChunks[idx].size) return WEBP_MUX_BAD_DATA;
// Extract info.
frame->x_offset = 2 * GetLE24(frame_frgm_data->bytes + 0);
frame->y_offset = 2 * GetLE24(frame_frgm_data->bytes + 3);
frame->duration = is_frame ? GetLE24(frame_frgm_data->bytes + 12) : 1;
frame->dispose_method =
is_frame ? (WebPMuxAnimDispose)(frame_frgm_data->bytes[15] & 1)
: WEBP_MUX_DISPOSE_NONE;
frame->id = ChunkGetIdFromTag(wpi->header_->tag_);
return SynthesizeBitstream(wpi, &frame->bitstream);
}
static WebPMuxError MuxGetFrameTileInternal(
const WebPMux* const mux, uint32_t nth, WebPData* const bitstream,
int* const x_offset, int* const y_offset, int* const duration,
uint32_t tag) {
const WebPData* frame_tile_data;
WebPMuxError WebPMuxGetFrame(
const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame) {
WebPMuxError err;
WebPMuxImage* wpi;
const int is_frame = (tag == kChunks[WEBP_CHUNK_FRAME].tag) ? 1 : 0;
const CHUNK_INDEX idx = is_frame ? IDX_FRAME : IDX_TILE;
const WebPChunkId id = kChunks[idx].id;
if (mux == NULL || bitstream == NULL ||
x_offset == NULL || y_offset == NULL || (is_frame && duration == NULL)) {
// Sanity checks.
if (mux == NULL || frame == NULL) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// Get the nth WebPMuxImage.
err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, nth, id, &wpi);
err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, nth, &wpi);
if (err != WEBP_MUX_OK) return err;
// Get frame chunk.
assert(wpi->header_ != NULL); // As MuxImageGetNth() already checked header_.
frame_tile_data = &wpi->header_->data_;
// Get frame info.
if (wpi->header_ == NULL) {
return MuxGetImageInternal(wpi, frame);
} else {
return MuxGetFrameFragmentInternal(wpi, frame);
}
}
if (frame_tile_data->size_ < kChunks[idx].size) return WEBP_MUX_BAD_DATA;
*x_offset = 2 * GetLE24(frame_tile_data->bytes_ + 0);
*y_offset = 2 * GetLE24(frame_tile_data->bytes_ + 3);
if (is_frame) *duration = 1 + GetLE24(frame_tile_data->bytes_ + 12);
WebPMuxError WebPMuxGetAnimationParams(const WebPMux* mux,
WebPMuxAnimParams* params) {
WebPData anim;
WebPMuxError err;
return SynthesizeBitstream(wpi, bitstream);
}
if (mux == NULL || params == NULL) return WEBP_MUX_INVALID_ARGUMENT;
WebPMuxError WebPMuxGetFrame(const WebPMux* mux, uint32_t nth,
WebPData* bitstream,
int* x_offset, int* y_offset, int* duration) {
return MuxGetFrameTileInternal(mux, nth, bitstream, x_offset, y_offset,
duration, kChunks[IDX_FRAME].tag);
}
err = MuxGet(mux, IDX_ANIM, 1, &anim);
if (err != WEBP_MUX_OK) return err;
if (anim.size < kChunks[WEBP_CHUNK_ANIM].size) return WEBP_MUX_BAD_DATA;
params->bgcolor = GetLE32(anim.bytes);
params->loop_count = GetLE16(anim.bytes + 4);
WebPMuxError WebPMuxGetTile(const WebPMux* mux, uint32_t nth,
WebPData* bitstream,
int* x_offset, int* y_offset) {
return MuxGetFrameTileInternal(mux, nth, bitstream, x_offset, y_offset, NULL,
kChunks[IDX_TILE].tag);
return WEBP_MUX_OK;
}
// Get chunk index from chunk id. Returns IDX_NIL if not found.

@ -15,7 +15,11 @@
extern "C" {
#endif
#define MK(X) (((bit_t)(X) << (BITS)) | (MASK))
#ifndef USE_RIGHT_JUSTIFY
#define MK(X) (((range_t)(X) << (BITS)) | (MASK))
#else
#define MK(X) ((range_t)(X))
#endif
//------------------------------------------------------------------------------
// VP8BitReader
@ -29,7 +33,7 @@ void VP8InitBitReader(VP8BitReader* const br,
br->buf_ = start;
br->buf_end_ = end;
br->value_ = 0;
br->missing_ = 8; // to load the very first 8bits
br->bits_ = -8; // to load the very first 8bits
br->eof_ = 0;
}
@ -46,7 +50,7 @@ const uint8_t kVP8Log2Range[128] = {
};
// range = (range << kVP8Log2Range[range]) + trailing 1's
const bit_t kVP8NewRange[128] = {
const range_t kVP8NewRange[128] = {
MK(127), MK(127), MK(191), MK(127), MK(159), MK(191), MK(223), MK(127),
MK(143), MK(159), MK(175), MK(191), MK(207), MK(223), MK(239), MK(127),
MK(135), MK(143), MK(151), MK(159), MK(167), MK(175), MK(183), MK(191),
@ -71,9 +75,19 @@ void VP8LoadFinalBytes(VP8BitReader* const br) {
assert(br != NULL && br->buf_ != NULL);
// Only read 8bits at a time
if (br->buf_ < br->buf_end_) {
br->value_ |= (bit_t)(*br->buf_++) << ((BITS) - 8 + br->missing_);
br->missing_ -= 8;
} else {
#ifndef USE_RIGHT_JUSTIFY
br->value_ |= (bit_t)(*br->buf_++) << ((BITS) - 8 - br->bits_);
#else
br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8);
#endif
br->bits_ += 8;
} else if (!br->eof_) {
#ifdef USE_RIGHT_JUSTIFY
// These are not strictly needed, but it makes the behaviour
// consistent for both USE_RIGHT_JUSTIFY and !USE_RIGHT_JUSTIFY.
br->value_ <<= 8;
br->bits_ += 8;
#endif
br->eof_ = 1;
}
}
@ -99,6 +113,10 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
#define MAX_NUM_BIT_READ 25
#define LBITS 64 // Number of bits prefetched.
#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow.
#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits.
static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
@ -120,7 +138,7 @@ void VP8LInitBitReader(VP8LBitReader* const br,
br->eos_ = 0;
br->error_ = 0;
for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (8 * i);
++br->pos_;
}
}
@ -135,91 +153,56 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
br->len_ = len;
}
// If not at EOS, reload up to LBITS byte-by-byte
static void ShiftBytes(VP8LBitReader* const br) {
while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
br->val_ >>= 8;
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << 56;
br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (LBITS - 8);
++br->pos_;
br->bit_pos_ -= 8;
}
}
void VP8LFillBitWindow(VP8LBitReader* const br) {
if (br->bit_pos_ >= 32) {
#if defined(__x86_64__) || defined(_M_X64)
if (br->pos_ + 8 < br->len_) {
br->val_ >>= 32;
if (br->bit_pos_ >= WBITS) {
#if (defined(__x86_64__) || defined(_M_X64))
if (br->pos_ + sizeof(br->val_) < br->len_) {
br->val_ >>= WBITS;
br->bit_pos_ -= WBITS;
// The expression below needs a little-endian arch to work correctly.
// This gives a large speedup for decoding speed.
br->val_ |= *(const uint64_t *)(br->buf_ + br->pos_) << 32;
br->pos_ += 4;
br->bit_pos_ -= 32;
} else {
// Slow path.
ShiftBytes(br);
br->val_ |= *(const vp8l_val_t*)(br->buf_ + br->pos_) << (LBITS - WBITS);
br->pos_ += LOG8_WBITS;
return;
}
#else
// Always the slow path.
ShiftBytes(br);
#endif
}
if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
br->eos_ = 1;
}
}
uint32_t VP8LReadOneBit(VP8LBitReader* const br) {
const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
// Flag an error at end_of_stream.
if (!br->eos_) {
++br->bit_pos_;
if (br->bit_pos_ >= 32) {
ShiftBytes(br);
}
// After this last bit is read, check if eos needs to be flagged.
if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
ShiftBytes(br); // Slow path.
if (br->pos_ == br->len_ && br->bit_pos_ == LBITS) {
br->eos_ = 1;
}
} else {
br->error_ = 1;
}
return val;
}
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
uint32_t val = 0;
assert(n_bits >= 0);
// Flag an error if end_of_stream or n_bits is more than allowed limit.
if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) {
const uint32_t val =
(uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
const int new_bits = br->bit_pos_ + n_bits;
br->bit_pos_ = new_bits;
// If this read is going to cross the read buffer, set the eos flag.
if (br->pos_ == br->len_) {
if ((br->bit_pos_ + n_bits) >= 64) {
if (new_bits >= LBITS) {
br->eos_ = 1;
if ((br->bit_pos_ + n_bits) > 64) return val;
}
}
val = (br->val_ >> br->bit_pos_) & kBitMask[n_bits];
br->bit_pos_ += n_bits;
if (br->bit_pos_ >= 40) {
if (br->pos_ + 5 < br->len_) {
br->val_ >>= 40;
br->val_ |=
(((uint64_t)br->buf_[br->pos_ + 0]) << 24) |
(((uint64_t)br->buf_[br->pos_ + 1]) << 32) |
(((uint64_t)br->buf_[br->pos_ + 2]) << 40) |
(((uint64_t)br->buf_[br->pos_ + 3]) << 48) |
(((uint64_t)br->buf_[br->pos_ + 4]) << 56);
br->pos_ += 5;
br->bit_pos_ -= 40;
}
if (br->bit_pos_ >= 8) {
ShiftBytes(br);
}
}
ShiftBytes(br);
return val;
} else {
br->error_ = 1;
return 0;
}
return val;
}
//------------------------------------------------------------------------------

@ -24,11 +24,80 @@
extern "C" {
#endif
#define BITS 32 // can be 32, 16 or 8
#define MASK ((((bit_t)1) << (BITS)) - 1)
#if (BITS == 32)
typedef uint64_t bit_t; // natural register type
typedef uint32_t lbit_t; // natural type for memory I/O
// The Boolean decoder needs to maintain infinite precision on the value_ field.
// However, since range_ is only 8bit, we only need an active window of 8 bits
// for value_. Left bits (MSB) gets zeroed and shifted away when value_ falls
// below 128, range_ is updated, and fresh bits read from the bitstream are
// brought in as LSB.
// To avoid reading the fresh bits one by one (slow), we cache a few of them
// ahead (actually, we cache BITS of them ahead. See below). There's two
// strategies regarding how to shift these looked-ahead fresh bits into the
// 8bit window of value_: either we shift them in, while keeping the position of
// the window fixed. Or we slide the window to the right while keeping the cache
// bits at a fixed, right-justified, position.
//
// Example, for BITS=16: here is the content of value_ for both strategies:
//
// !USE_RIGHT_JUSTIFY || USE_RIGHT_JUSTIFY
// ||
// <- 8b -><- 8b -><- BITS bits -> || <- 8b+3b -><- 8b -><- 13 bits ->
// [unused][value_][cached bits][0] || [unused...][value_][cached bits]
// [........00vvvvvvBBBBBBBBBBBBB000]LSB || [...........00vvvvvvBBBBBBBBBBBBB]
// ||
// After calling VP8Shift(), where we need to shift away two zeros:
// [........vvvvvvvvBBBBBBBBBBB00000]LSB || [.............vvvvvvvvBBBBBBBBBBB]
// ||
// Just before we need to call VP8LoadNewBytes(), the situation is:
// [........vvvvvv000000000000000000]LSB || [..........................vvvvvv]
// ||
// And just after calling VP8LoadNewBytes():
// [........vvvvvvvvBBBBBBBBBBBBBBBB]LSB || [........vvvvvvvvBBBBBBBBBBBBBBBB]
//
// -> we're back to height active 'value_' bits (marked 'v') and BITS cached
// bits (marked 'B')
//
// The right-justify strategy tends to use less shifts and is often faster.
//------------------------------------------------------------------------------
// BITS can be any multiple of 8 from 8 to 56 (inclusive).
// Pick values that fit natural register size.
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
#define USE_RIGHT_JUSTIFY
#if defined(__i386__) || defined(_M_IX86) // x86 32bit
#define BITS 16
#elif defined(__x86_64__) || defined(_M_X64) // x86 64bit
#define BITS 56
#elif defined(__arm__) || defined(_M_ARM) // ARM
#define BITS 24
#else // reasonable default
#define BITS 24
#endif
#else // reference choices
#define USE_RIGHT_JUSTIFY
#define BITS 8
#endif
//------------------------------------------------------------------------------
// Derived types and constants
// bit_t = natural register type
// lbit_t = natural type for memory I/O
#if (BITS > 32)
typedef uint64_t bit_t;
typedef uint64_t lbit_t;
#elif (BITS == 32)
typedef uint64_t bit_t;
typedef uint32_t lbit_t;
#elif (BITS == 24)
typedef uint32_t bit_t;
typedef uint32_t lbit_t;
#elif (BITS == 16)
typedef uint32_t bit_t;
typedef uint16_t lbit_t;
@ -37,8 +106,15 @@ typedef uint32_t bit_t;
typedef uint8_t lbit_t;
#endif
#ifndef USE_RIGHT_JUSTIFY
typedef bit_t range_t; // type for storing range_
#define MASK ((((bit_t)1) << (BITS)) - 1)
#else
typedef uint32_t range_t; // range_ only uses 8bits here. No need for bit_t.
#endif
//------------------------------------------------------------------------------
// Bitreader and code-tree reader
// Bitreader
typedef struct VP8BitReader VP8BitReader;
struct VP8BitReader {
@ -47,9 +123,9 @@ struct VP8BitReader {
int eof_; // true if input is exhausted
// boolean decoder
bit_t range_; // current range minus 1. In [127, 254] interval.
bit_t value_; // current value
int missing_; // number of missing bits in value_ (8bit)
range_t range_; // current range minus 1. In [127, 254] interval.
bit_t value_; // current value
int bits_; // number of valid bits left
};
// Initialize the bit reader and the boolean decoder.
@ -67,12 +143,12 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);
// Read a bit with proba 'prob'. Speed-critical function!
extern const uint8_t kVP8Log2Range[128];
extern const bit_t kVP8NewRange[128];
extern const range_t kVP8NewRange[128];
void VP8LoadFinalBytes(VP8BitReader* const br); // special case for the tail
static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
assert(br && br->buf_);
assert(br != NULL && br->buf_ != NULL);
// Read 'BITS' bits at a time if possible.
if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) {
// convert memory type to register type (with some zero'ing!)
@ -80,70 +156,124 @@ static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
lbit_t in_bits = *(lbit_t*)br->buf_;
br->buf_ += (BITS) >> 3;
#if !defined(__BIG_ENDIAN__)
#if (BITS == 32)
#if (BITS > 32)
// gcc 4.3 has builtin functions for swap32/swap64
#if defined(__GNUC__) && \
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
bits = (bit_t)__builtin_bswap64(in_bits);
#elif defined(_MSC_VER)
bits = (bit_t)_byteswap_uint64(in_bits);
#elif defined(__x86_64__)
__asm__ volatile("bswapq %0" : "=r"(bits) : "0"(in_bits));
#else // generic code for swapping 64-bit values (suggested by bdb@)
bits = (bit_t)in_bits;
bits = ((bits & 0xffffffff00000000ull) >> 32) |
((bits & 0x00000000ffffffffull) << 32);
bits = ((bits & 0xffff0000ffff0000ull) >> 16) |
((bits & 0x0000ffff0000ffffull) << 16);
bits = ((bits & 0xff00ff00ff00ff00ull) >> 8) |
((bits & 0x00ff00ff00ff00ffull) << 8);
#endif
bits >>= 64 - BITS;
#elif (BITS >= 24)
#if defined(__i386__) || defined(__x86_64__)
__asm__ volatile("bswap %k0" : "=r"(in_bits) : "0"(in_bits));
bits = (bit_t)in_bits; // 32b -> 64b zero-extension
bits = (bit_t)in_bits; // 24b/32b -> 32b/64b zero-extension
#elif defined(_MSC_VER)
bits = _byteswap_ulong(in_bits);
bits = (bit_t)_byteswap_ulong(in_bits);
#else
bits = (bit_t)(in_bits >> 24) | ((in_bits >> 8) & 0xff00)
| ((in_bits << 8) & 0xff0000) | (in_bits << 24);
#endif // x86
bits >>= (32 - BITS);
#elif (BITS == 16)
// gcc will recognize a 'rorw $8, ...' here:
bits = (bit_t)(in_bits >> 8) | ((in_bits & 0xff) << 8);
#else // BITS == 8
bits = (bit_t)in_bits;
#endif
#else // LITTLE_ENDIAN
#else // BIG_ENDIAN
bits = (bit_t)in_bits;
#endif
br->value_ |= bits << br->missing_;
br->missing_ -= (BITS);
#ifndef USE_RIGHT_JUSTIFY
br->value_ |= bits << (-br->bits_);
#else
br->value_ = bits | (br->value_ << (BITS));
#endif
br->bits_ += (BITS);
} else {
VP8LoadFinalBytes(br); // no need to be inlined
}
}
static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, bit_t split) {
const bit_t value_split = split | (MASK);
if (br->missing_ > 0) { // Make sure we have a least BITS bits in 'value_'
static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, range_t split) {
if (br->bits_ < 0) { // Make sure we have a least BITS bits in 'value_'
VP8LoadNewBytes(br);
}
if (br->value_ > value_split) {
br->range_ -= value_split + 1;
br->value_ -= value_split + 1;
#ifndef USE_RIGHT_JUSTIFY
split |= (MASK);
if (br->value_ > split) {
br->range_ -= split + 1;
br->value_ -= split + 1;
return 1;
} else {
br->range_ = value_split;
br->range_ = split;
return 0;
}
#else
{
const int pos = br->bits_;
const range_t value = (range_t)(br->value_ >> pos);
if (value > split) {
br->range_ -= split + 1;
br->value_ -= (bit_t)(split + 1) << pos;
return 1;
} else {
br->range_ = split;
return 0;
}
}
#endif
}
static WEBP_INLINE void VP8Shift(VP8BitReader* const br) {
#ifndef USE_RIGHT_JUSTIFY
// range_ is in [0..127] interval here.
const int idx = br->range_ >> (BITS);
const bit_t idx = br->range_ >> (BITS);
const int shift = kVP8Log2Range[idx];
br->range_ = kVP8NewRange[idx];
br->value_ <<= shift;
br->missing_ += shift;
br->bits_ -= shift;
#else
const int shift = kVP8Log2Range[br->range_];
assert(br->range_ < (range_t)128);
br->range_ = kVP8NewRange[br->range_];
br->bits_ -= shift;
#endif
}
static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
#ifndef USE_RIGHT_JUSTIFY
// It's important to avoid generating a 64bit x 64bit multiply here.
// We just need an 8b x 8b after all.
const bit_t split =
(bit_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8);
const range_t split =
(range_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8);
const int bit = VP8BitUpdate(br, split);
if (br->range_ <= (((range_t)0x7e << (BITS)) | (MASK))) {
VP8Shift(br);
}
return bit;
#else
const range_t split = (br->range_ * prob) >> 8;
const int bit = VP8BitUpdate(br, split);
if (br->range_ <= (((bit_t)0x7e << (BITS)) | (MASK))) {
if (br->range_ <= (range_t)0x7e) {
VP8Shift(br);
}
return bit;
#endif
}
static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
const bit_t split = (br->range_ >> 1);
const range_t split = (br->range_ >> 1);
const int bit = VP8BitUpdate(br, split);
VP8Shift(br);
return bit ? -v : v;
@ -151,16 +281,18 @@ static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
// -----------------------------------------------------------------------------
// Bitreader
// Bitreader for lossless format
typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit.
typedef struct {
uint64_t val_;
const uint8_t* buf_;
size_t len_;
size_t pos_;
int bit_pos_;
int eos_;
int error_;
vp8l_val_t val_; // pre-fetched bits
const uint8_t* buf_; // input byte buffer
size_t len_; // buffer length
size_t pos_; // byte position in buf_
int bit_pos_; // current bit-reading position in val_
int eos_; // bitstream is finished
int error_; // an error occurred (buffer overflow attempt...)
} VP8LBitReader;
void VP8LInitBitReader(VP8LBitReader* const br,
@ -176,17 +308,14 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
// Flags eos if this read attempt is going to cross the read buffer.
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
// Reads one bit from Read Buffer. Flags an error in case end_of_stream.
// Flags eos after reading last bit from the buffer.
uint32_t VP8LReadOneBit(VP8LBitReader* const br);
// VP8LReadOneBitUnsafe is faster than VP8LReadOneBit, but it can be called only
// 32 times after the last VP8LFillBitWindow. Any subsequent calls
// (without VP8LFillBitWindow) will return invalid data.
static WEBP_INLINE uint32_t VP8LReadOneBitUnsafe(VP8LBitReader* const br) {
const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
++br->bit_pos_;
return val;
// Return the prefetched bits, so they can be looked up.
static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) {
return (uint32_t)(br->val_ >> br->bit_pos_);
}
// Discard 'num_bits' bits from the cache.
static WEBP_INLINE void VP8LDiscardBits(VP8LBitReader* const br, int num_bits) {
br->bit_pos_ += num_bits;
}
// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.

@ -26,8 +26,7 @@ extern "C" {
assert(out != NULL); \
assert(width > 0); \
assert(height > 0); \
assert(bpp > 0); \
assert(stride >= width * bpp);
assert(stride >= width);
static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
uint8_t* dst, int length, int inverse) {
@ -43,7 +42,8 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
// Horizontal filter.
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
int width, int height, int bpp, int stride, int inverse, uint8_t* out) {
int width, int height, int stride,
int inverse, uint8_t* out) {
int h;
const uint8_t* preds = (inverse ? out : in);
SANITY_CHECK(in, out);
@ -52,11 +52,11 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
for (h = 0; h < height; ++h) {
// Leftmost pixel is predicted from above (except for topmost scanline).
if (h == 0) {
memcpy((void*)out, (const void*)in, bpp);
out[0] = in[0];
} else {
PredictLine(in, preds - stride, out, bpp, inverse);
PredictLine(in, preds - stride, out, 1, inverse);
}
PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
preds += stride;
in += stride;
out += stride;
@ -64,46 +64,46 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
}
static void HorizontalFilter(const uint8_t* data, int width, int height,
int bpp, int stride, uint8_t* filtered_data) {
DoHorizontalFilter(data, width, height, bpp, stride, 0, filtered_data);
int stride, uint8_t* filtered_data) {
DoHorizontalFilter(data, width, height, stride, 0, filtered_data);
}
static void HorizontalUnfilter(const uint8_t* data, int width, int height,
int bpp, int stride, uint8_t* recon_data) {
DoHorizontalFilter(data, width, height, bpp, stride, 1, recon_data);
static void HorizontalUnfilter(int width, int height, int stride,
uint8_t* data) {
DoHorizontalFilter(data, width, height, stride, 1, data);
}
//------------------------------------------------------------------------------
// Vertical filter.
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
int width, int height, int bpp, int stride, int inverse, uint8_t* out) {
int width, int height, int stride,
int inverse, uint8_t* out) {
int h;
const uint8_t* preds = (inverse ? out : in);
SANITY_CHECK(in, out);
// Very first top-left pixel is copied.
memcpy((void*)out, (const void*)in, bpp);
out[0] = in[0];
// Rest of top scan-line is left-predicted.
PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
// Filter line-by-line.
for (h = 1; h < height; ++h) {
in += stride;
out += stride;
PredictLine(in, preds, out, bpp * width, inverse);
PredictLine(in, preds, out, width, inverse);
preds += stride;
}
}
static void VerticalFilter(const uint8_t* data, int width, int height,
int bpp, int stride, uint8_t* filtered_data) {
DoVerticalFilter(data, width, height, bpp, stride, 0, filtered_data);
int stride, uint8_t* filtered_data) {
DoVerticalFilter(data, width, height, stride, 0, filtered_data);
}
static void VerticalUnfilter(const uint8_t* data, int width, int height,
int bpp, int stride, uint8_t* recon_data) {
DoVerticalFilter(data, width, height, bpp, stride, 1, recon_data);
static void VerticalUnfilter(int width, int height, int stride, uint8_t* data) {
DoVerticalFilter(data, width, height, stride, 1, data);
}
//------------------------------------------------------------------------------
@ -111,19 +111,19 @@ static void VerticalUnfilter(const uint8_t* data, int width, int height,
static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
const int g = a + b - c;
return (g < 0) ? 0 : (g > 255) ? 255 : g;
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
}
static WEBP_INLINE
void DoGradientFilter(const uint8_t* in, int width, int height,
int bpp, int stride, int inverse, uint8_t* out) {
int stride, int inverse, uint8_t* out) {
const uint8_t* preds = (inverse ? out : in);
int h;
SANITY_CHECK(in, out);
// left prediction for top scan-line
memcpy((void*)out, (const void*)in, bpp);
PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
out[0] = in[0];
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
// Filter line-by-line.
for (h = 1; h < height; ++h) {
@ -132,24 +132,23 @@ void DoGradientFilter(const uint8_t* in, int width, int height,
in += stride;
out += stride;
// leftmost pixel: predict from above.
PredictLine(in, preds - stride, out, bpp, inverse);
for (w = bpp; w < width * bpp; ++w) {
const int pred = GradientPredictor(preds[w - bpp],
PredictLine(in, preds - stride, out, 1, inverse);
for (w = 1; w < width; ++w) {
const int pred = GradientPredictor(preds[w - 1],
preds[w - stride],
preds[w - stride - bpp]);
preds[w - stride - 1]);
out[w] = in[w] + (inverse ? pred : -pred);
}
}
}
static void GradientFilter(const uint8_t* data, int width, int height,
int bpp, int stride, uint8_t* filtered_data) {
DoGradientFilter(data, width, height, bpp, stride, 0, filtered_data);
int stride, uint8_t* filtered_data) {
DoGradientFilter(data, width, height, stride, 0, filtered_data);
}
static void GradientUnfilter(const uint8_t* data, int width, int height,
int bpp, int stride, uint8_t* recon_data) {
DoGradientFilter(data, width, height, bpp, stride, 1, recon_data);
static void GradientUnfilter(int width, int height, int stride, uint8_t* data) {
DoGradientFilter(data, width, height, stride, 1, data);
}
#undef SANITY_CHECK
@ -215,7 +214,7 @@ const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST] = {
GradientFilter // WEBP_FILTER_GRADIENT
};
const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST] = {
const WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST] = {
NULL, // WEBP_FILTER_NONE
HorizontalUnfilter, // WEBP_FILTER_HORIZONTAL
VerticalUnfilter, // WEBP_FILTER_VERTICAL

@ -30,18 +30,19 @@ typedef enum {
} WEBP_FILTER_TYPE;
typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
int bpp, int stride, uint8_t* out);
int stride, uint8_t* out);
typedef void (*WebPUnfilterFunc)(int width, int height, int stride,
uint8_t* data);
// Filter the given data using the given predictor.
// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
// in raster order.
// 'bpp' is number of bytes per pixel, and
// 'stride' is number of bytes per scan line (with possible padding).
// 'out' should be pre-allocated.
extern const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
// Reconstruct the original data from the given filtered data.
extern const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST];
// In-place reconstruct the original data from the given filtered data.
extern const WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
// Fast estimate of a potentially good filter.
extern WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,

@ -138,13 +138,8 @@ static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
} else if (t1->total_count_ < t2->total_count_) {
return 1;
} else {
if (t1->value_ < t2->value_) {
return -1;
}
if (t1->value_ > t2->value_) {
return 1;
}
return 0;
assert(t1->value_ != t2->value_);
return (t1->value_ < t2->value_) ? -1 : 1;
}
}
@ -193,6 +188,10 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
}
}
if (tree_size_orig == 0) { // pretty optimal already!
return 1;
}
// 3 * tree_size is enough to cover all the nodes representing a
// population and all the inserted nodes combining two existing nodes.
// The tree pool needs 2 * (tree_size_orig - 1) entities, and the
@ -234,7 +233,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
tree_pool[tree_pool_size++] = tree[tree_size - 1];
tree_pool[tree_pool_size++] = tree[tree_size - 2];
count = tree_pool[tree_pool_size - 1].total_count_ +
tree_pool[tree_pool_size - 2].total_count_;
tree_pool[tree_pool_size - 2].total_count_;
tree_size -= 2;
{
// Search for the insertion point.

@ -140,15 +140,6 @@ int QuantizeLevels(uint8_t* const data, int width, int height,
return 1;
}
int DequantizeLevels(uint8_t* const data, int width, int height) {
if (data == NULL || width <= 0 || height <= 0) return 0;
// TODO(skal): implement gradient smoothing.
(void)data;
(void)width;
(void)height;
return 1;
}
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

@ -27,11 +27,6 @@ extern "C" {
int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
uint64_t* const sse);
// Apply post-processing to input 'data' of size 'width'x'height' assuming
// that the source was quantized to a reduced number of levels.
// Returns false in case of error (data is NULL, invalid parameters, ...).
int DequantizeLevels(uint8_t* const data, int width, int height);
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

@ -0,0 +1,28 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
// TODO(skal): implement gradient smoothing.
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./quant_levels_dec.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
int DequantizeLevels(uint8_t* const data, int width, int height) {
if (data == NULL || width <= 0 || height <= 0) return 0;
(void)data;
(void)width;
(void)height;
return 1;
}
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

@ -0,0 +1,30 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
// Alpha plane de-quantization utility
//
// Author: Vikas Arora (vikasa@google.com)
#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_H_
#define WEBP_UTILS_QUANT_LEVELS_DEC_H_
#include "../webp/types.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
// Apply post-processing to input 'data' of size 'width'x'height' assuming
// that the source was quantized to a reduced number of levels.
// Returns false in case of error (data is NULL, invalid parameters, ...).
int DequantizeLevels(uint8_t* const data, int width, int height);
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
#endif /* WEBP_UTILS_QUANT_LEVELS_DEC_H_ */

@ -20,7 +20,7 @@ extern "C" {
#endif
#define RFIX 30
#define MULT_FIX(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
#define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
uint8_t* const dst, int dst_width, int dst_height,

@ -9,10 +9,6 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <assert.h>
#include <string.h> // for memset()
#include "./thread.h"

@ -12,6 +12,10 @@
#ifndef WEBP_UTILS_THREAD_H_
#define WEBP_UTILS_THREAD_H_
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
@ -63,13 +67,13 @@ typedef struct {
// Must be called first, before any other method.
void WebPWorkerInit(WebPWorker* const worker);
// Must be called initialize the object and spawn the thread. Re-entrant.
// Must be called to initialize the object and spawn the thread. Re-entrant.
// Will potentially launch the thread. Returns false in case of error.
int WebPWorkerReset(WebPWorker* const worker);
// Make sure the previous work is finished. Returns true if worker->had_error
// was not set and not error condition was triggered by the working thread.
// Makes sure the previous work is finished. Returns true if worker->had_error
// was not set and no error condition was triggered by the working thread.
int WebPWorkerSync(WebPWorker* const worker);
// Trigger the thread to call hook() with data1 and data2 argument. These
// Triggers the thread to call hook() with data1 and data2 argument. These
// hook/data1/data2 can be changed at any time before calling this function,
// but not be changed afterward until the next call to WebPWorkerSync().
void WebPWorkerLaunch(WebPWorker* const worker);

@ -19,7 +19,8 @@ extern "C" {
//------------------------------------------------------------------------------
// Checked memory allocation
static int CheckSizeArguments(uint64_t nmemb, size_t size) {
// Returns 0 in case of overflow of nmemb * size.
static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
const uint64_t total_size = nmemb * size;
if (nmemb == 0) return 1;
if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
@ -28,12 +29,14 @@ static int CheckSizeArguments(uint64_t nmemb, size_t size) {
}
void* WebPSafeMalloc(uint64_t nmemb, size_t size) {
if (!CheckSizeArguments(nmemb, size)) return NULL;
if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
assert(nmemb * size > 0);
return malloc((size_t)(nmemb * size));
}
void* WebPSafeCalloc(uint64_t nmemb, size_t size) {
if (!CheckSizeArguments(nmemb, size)) return NULL;
if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
assert(nmemb * size > 0);
return calloc((size_t)nmemb, size);
}

@ -7,11 +7,14 @@
//
// Misc. common utility functions
//
// Author: Skal (pascal.massimino@gmail.com)
// Authors: Skal (pascal.massimino@gmail.com)
// Urvang (urvang@google.com)
#ifndef WEBP_UTILS_UTILS_H_
#define WEBP_UTILS_UTILS_H_
#include <assert.h>
#include "../webp/types.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -35,6 +38,40 @@ void* WebPSafeMalloc(uint64_t nmemb, size_t size);
// in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
void* WebPSafeCalloc(uint64_t nmemb, size_t size);
//------------------------------------------------------------------------------
// Reading/writing data.
// Read 16, 24 or 32 bits stored in little-endian order.
static WEBP_INLINE int GetLE16(const uint8_t* const data) {
return (int)(data[0] << 0) | (data[1] << 8);
}
static WEBP_INLINE int GetLE24(const uint8_t* const data) {
return GetLE16(data) | (data[2] << 16);
}
static WEBP_INLINE uint32_t GetLE32(const uint8_t* const data) {
return (uint32_t)GetLE16(data) | (GetLE16(data + 2) << 16);
}
// Store 16, 24 or 32 bits in little-endian order.
static WEBP_INLINE void PutLE16(uint8_t* const data, int val) {
assert(val < (1 << 16));
data[0] = (val >> 0);
data[1] = (val >> 8);
}
static WEBP_INLINE void PutLE24(uint8_t* const data, int val) {
assert(val < (1 << 24));
PutLE16(data, val & 0xffff);
data[2] = (val >> 16);
}
static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
PutLE16(data, (int)(val & 0xffff));
PutLE16(data + 2, (int)(val >> 16));
}
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)

@ -18,7 +18,19 @@
extern "C" {
#endif
#define WEBP_DECODER_ABI_VERSION 0x0200 // MAJOR(8b) + MINOR(8b)
#define WEBP_DECODER_ABI_VERSION 0x0201 // MAJOR(8b) + MINOR(8b)
typedef struct WebPRGBABuffer WebPRGBABuffer;
typedef struct WebPYUVABuffer WebPYUVABuffer;
typedef struct WebPDecBuffer WebPDecBuffer;
#if !(defined(__cplusplus) || defined(c_plusplus))
typedef enum VP8StatusCode VP8StatusCode;
typedef enum WEBP_CSP_MODE WEBP_CSP_MODE;
#endif
typedef struct WebPIDecoder WebPIDecoder;
typedef struct WebPBitstreamFeatures WebPBitstreamFeatures;
typedef struct WebPDecoderOptions WebPDecoderOptions;
typedef struct WebPDecoderConfig WebPDecoderConfig;
// Return the decoder's version number, packed in hexadecimal using 8bits for
// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
@ -118,20 +130,28 @@ WEBP_EXTERN(uint8_t*) WebPDecodeYUVInto(
// Note: the naming describes the byte-ordering of packed samples in memory.
// For instance, MODE_BGRA relates to samples ordered as B,G,R,A,B,G,R,A,...
// Non-capital names (e.g.:MODE_Argb) relates to pre-multiplied RGB channels.
// RGB-565 and RGBA-4444 are also endian-agnostic and byte-oriented.
typedef enum { MODE_RGB = 0, MODE_RGBA = 1,
MODE_BGR = 2, MODE_BGRA = 3,
MODE_ARGB = 4, MODE_RGBA_4444 = 5,
MODE_RGB_565 = 6,
// RGB-premultiplied transparent modes (alpha value is preserved)
MODE_rgbA = 7,
MODE_bgrA = 8,
MODE_Argb = 9,
MODE_rgbA_4444 = 10,
// YUV modes must come after RGB ones.
MODE_YUV = 11, MODE_YUVA = 12, // yuv 4:2:0
MODE_LAST = 13
} WEBP_CSP_MODE;
// RGBA-4444 and RGB-565 colorspaces are represented by following byte-order:
// RGBA-4444: [r3 r2 r1 r0 g3 g2 g1 g0], [b3 b2 b1 b0 a3 a2 a1 a0], ...
// RGB-565: [r4 r3 r2 r1 r0 g5 g4 g3], [g2 g1 g0 b4 b3 b2 b1 b0], ...
// In the case WEBP_SWAP_16BITS_CSP is defined, the bytes are swapped for
// these two modes:
// RGBA-4444: [b3 b2 b1 b0 a3 a2 a1 a0], [r3 r2 r1 r0 g3 g2 g1 g0], ...
// RGB-565: [g2 g1 g0 b4 b3 b2 b1 b0], [r4 r3 r2 r1 r0 g5 g4 g3], ...
enum WEBP_CSP_MODE {
MODE_RGB = 0, MODE_RGBA = 1,
MODE_BGR = 2, MODE_BGRA = 3,
MODE_ARGB = 4, MODE_RGBA_4444 = 5,
MODE_RGB_565 = 6,
// RGB-premultiplied transparent modes (alpha value is preserved)
MODE_rgbA = 7,
MODE_bgrA = 8,
MODE_Argb = 9,
MODE_rgbA_4444 = 10,
// YUV modes must come after RGB ones.
MODE_YUV = 11, MODE_YUVA = 12, // yuv 4:2:0
MODE_LAST = 13
};
// Some useful macros:
static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) {
@ -152,13 +172,13 @@ static WEBP_INLINE int WebPIsRGBMode(WEBP_CSP_MODE mode) {
//------------------------------------------------------------------------------
// WebPDecBuffer: Generic structure for describing the output sample buffer.
typedef struct { // view as RGBA
struct WebPRGBABuffer { // view as RGBA
uint8_t* rgba; // pointer to RGBA samples
int stride; // stride in bytes from one scanline to the next.
size_t size; // total size of the *rgba buffer.
} WebPRGBABuffer;
};
typedef struct { // view as YUVA
struct WebPYUVABuffer { // view as YUVA
uint8_t* y, *u, *v, *a; // pointer to luma, chroma U/V, alpha samples
int y_stride; // luma stride
int u_stride, v_stride; // chroma strides
@ -166,10 +186,10 @@ typedef struct { // view as YUVA
size_t y_size; // luma plane size
size_t u_size, v_size; // chroma planes size
size_t a_size; // alpha-plane size
} WebPYUVABuffer;
};
// Output buffer
typedef struct {
struct WebPDecBuffer {
WEBP_CSP_MODE colorspace; // Colorspace.
int width, height; // Dimensions.
int is_external_memory; // If true, 'internal_memory' pointer is not used.
@ -182,7 +202,7 @@ typedef struct {
uint8_t* private_memory; // Internally allocated memory (only when
// is_external_memory is false). Should not be used
// externally, but accessed via the buffer union.
} WebPDecBuffer;
};
// Internal, version-checked, entry point
WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer*, int);
@ -200,7 +220,7 @@ WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer);
//------------------------------------------------------------------------------
// Enumeration of the status codes
typedef enum {
enum VP8StatusCode {
VP8_STATUS_OK = 0,
VP8_STATUS_OUT_OF_MEMORY,
VP8_STATUS_INVALID_PARAM,
@ -209,7 +229,7 @@ typedef enum {
VP8_STATUS_SUSPENDED,
VP8_STATUS_USER_ABORT,
VP8_STATUS_NOT_ENOUGH_DATA
} VP8StatusCode;
};
//------------------------------------------------------------------------------
// Incremental decoding
@ -237,8 +257,6 @@ typedef enum {
// }
// WebPIDelete(idec);
typedef struct WebPIDecoder WebPIDecoder;
// Creates a new incremental decoder with the supplied buffer parameter.
// This output_buffer can be passed NULL, in which case a default output buffer
// is used (with MODE_RGB). Otherwise, an internal reference to 'output_buffer'
@ -251,19 +269,27 @@ WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
// will output the RGB/A samples specified by 'csp' into a preallocated
// buffer 'output_buffer'. The size of this buffer is at least
// 'output_buffer_size' and the stride (distance in bytes between two scanlines)
// is specified by 'output_stride'. Returns NULL if the allocation failed.
// is specified by 'output_stride'.
// Additionally, output_buffer can be passed NULL in which case the output
// buffer will be allocated automatically when the decoding starts. The
// colorspace 'csp' is taken into account for allocating this buffer. All other
// parameters are ignored.
// Returns NULL if the allocation failed, or if some parameters are invalid.
WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
WEBP_CSP_MODE csp,
uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
// This function allocates and initializes an incremental-decoder object, which
// will output the raw luma/chroma samples into a preallocated planes. The luma
// plane is specified by its pointer 'luma', its size 'luma_size' and its stride
// 'luma_stride'. Similarly, the chroma-u plane is specified by the 'u',
// 'u_size' and 'u_stride' parameters, and the chroma-v plane by 'v'
// and 'v_size'. And same for the alpha-plane. The 'a' pointer can be pass
// NULL in case one is not interested in the transparency plane.
// Returns NULL if the allocation failed.
// will output the raw luma/chroma samples into a preallocated planes if
// supplied. The luma plane is specified by its pointer 'luma', its size
// 'luma_size' and its stride 'luma_stride'. Similarly, the chroma-u plane
// is specified by the 'u', 'u_size' and 'u_stride' parameters, and the chroma-v
// plane by 'v' and 'v_size'. And same for the alpha-plane. The 'a' pointer
// can be pass NULL in case one is not interested in the transparency plane.
// Conversely, 'luma' can be passed NULL if no preallocated planes are supplied.
// In this case, the output buffer will be automatically allocated (using
// MODE_YUVA) when decoding starts. All parameters are then ignored.
// Returns NULL if the allocation failed or if a parameter is invalid.
WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
uint8_t* luma, size_t luma_size, int luma_stride,
uint8_t* u, size_t u_size, int u_stride,
@ -344,7 +370,7 @@ WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
// C) Adjust 'config', if needed
config.no_fancy = 1;
config.no_fancy_upsampling = 1;
config.output.colorspace = MODE_BGRA;
// etc.
@ -365,10 +391,11 @@ WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
*/
// Features gathered from the bitstream
typedef struct {
int width; // Width in pixels, as read from the bitstream.
int height; // Height in pixels, as read from the bitstream.
int has_alpha; // True if the bitstream contains an alpha channel.
struct WebPBitstreamFeatures {
int width; // Width in pixels, as read from the bitstream.
int height; // Height in pixels, as read from the bitstream.
int has_alpha; // True if the bitstream contains an alpha channel.
int has_animation; // True if the bitstream is an animation.
// Unused for now:
int bitstream_version; // should be 0 for now. TODO(later)
@ -376,8 +403,8 @@ typedef struct {
// recommended.
int rotate; // TODO(later)
int uv_sampling; // should be 0 for now. TODO(later)
uint32_t pad[3]; // padding for later use
} WebPBitstreamFeatures;
uint32_t pad[2]; // padding for later use
};
// Internal, version-checked, entry point
WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
@ -385,8 +412,9 @@ WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
// Retrieve features from the bitstream. The *features structure is filled
// with information gathered from the bitstream.
// Returns false in case of error or version mismatch.
// In case of error, features->bitstream_status will reflect the error code.
// Returns VP8_STATUS_OK when the features are successfully retrieved. Returns
// VP8_STATUS_NOT_ENOUGH_DATA when more data is needed to retrieve the
// features from headers. Returns error in other cases.
static WEBP_INLINE VP8StatusCode WebPGetFeatures(
const uint8_t* data, size_t data_size,
WebPBitstreamFeatures* features) {
@ -395,7 +423,7 @@ static WEBP_INLINE VP8StatusCode WebPGetFeatures(
}
// Decoding options
typedef struct {
struct WebPDecoderOptions {
int bypass_filtering; // if true, skip the in-loop filtering
int no_fancy_upsampling; // if true, use faster pointwise upsampler
int use_cropping; // if true, cropping is applied _first_
@ -410,14 +438,14 @@ typedef struct {
int force_rotation; // forced rotation (to be applied _last_)
int no_enhancement; // if true, discard enhancement layer
uint32_t pad[6]; // padding for later use
} WebPDecoderOptions;
};
// Main object storing the configuration for advanced decoding.
typedef struct {
struct WebPDecoderConfig {
WebPBitstreamFeatures input; // Immutable bitstream features (optional)
WebPDecBuffer output; // Output buffer (can point to external mem)
WebPDecoderOptions options; // Decoding options
} WebPDecoderConfig;
};
// Internal, version-checked, entry point
WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);

@ -0,0 +1,212 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
// Demux API.
// Enables extraction of image and extended format data from WebP files.
// Code Example: Demuxing WebP data to extract all the frames, ICC profile
// and EXIF/XMP metadata.
//
// WebPDemuxer* demux = WebPDemux(&webp_data);
//
// uint32_t width = WebPDemuxGetI(demux, WEBP_FF_CANVAS_WIDTH);
// uint32_t height = WebPDemuxGetI(demux, WEBP_FF_CANVAS_HEIGHT);
// // ... (Get information about the features present in the WebP file).
// uint32_t flags = WebPDemuxGetI(demux, WEBP_FF_FORMAT_FLAGS);
//
// // ... (Iterate over all frames).
// WebPIterator iter;
// if (WebPDemuxGetFrame(demux, 1, &iter)) {
// do {
// // ... (Consume 'iter'; e.g. Decode 'iter.fragment' with WebPDecode(),
// // ... and get other frame properties like width, height, offsets etc.
// // ... see 'struct WebPIterator' below for more info).
// } while (WebPDemuxNextFrame(&iter));
// WebPDemuxReleaseIterator(&iter);
// }
//
// // ... (Extract metadata).
// WebPChunkIterator chunk_iter;
// if (flags & ICCP_FLAG) WebPDemuxGetChunk(demux, "ICCP", 1, &chunk_iter);
// // ... (Consume the ICC profile in 'chunk_iter.chunk').
// WebPDemuxReleaseChunkIterator(&chunk_iter);
// if (flags & EXIF_FLAG) WebPDemuxGetChunk(demux, "EXIF", 1, &chunk_iter);
// // ... (Consume the EXIF metadata in 'chunk_iter.chunk').
// WebPDemuxReleaseChunkIterator(&chunk_iter);
// if (flags & XMP_FLAG) WebPDemuxGetChunk(demux, "XMP ", 1, &chunk_iter);
// // ... (Consume the XMP metadata in 'chunk_iter.chunk').
// WebPDemuxReleaseChunkIterator(&chunk_iter);
// WebPDemuxDelete(demux);
#ifndef WEBP_WEBP_DEMUX_H_
#define WEBP_WEBP_DEMUX_H_
#include "./mux_types.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define WEBP_DEMUX_ABI_VERSION 0x0100 // MAJOR(8b) + MINOR(8b)
typedef struct WebPDemuxer WebPDemuxer;
#if !(defined(__cplusplus) || defined(c_plusplus))
typedef enum WebPDemuxState WebPDemuxState;
typedef enum WebPFormatFeature WebPFormatFeature;
#endif
typedef struct WebPIterator WebPIterator;
typedef struct WebPChunkIterator WebPChunkIterator;
//------------------------------------------------------------------------------
// Returns the version number of the demux library, packed in hexadecimal using
// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
WEBP_EXTERN(int) WebPGetDemuxVersion(void);
//------------------------------------------------------------------------------
// Life of a Demux object
enum WebPDemuxState {
WEBP_DEMUX_PARSING_HEADER, // Not enough data to parse full header.
WEBP_DEMUX_PARSED_HEADER, // Header parsing complete, data may be available.
WEBP_DEMUX_DONE // Entire file has been parsed.
};
// Internal, version-checked, entry point
WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(
const WebPData*, int, WebPDemuxState*, int);
// Parses the full WebP file given by 'data'.
// Returns a WebPDemuxer object on successful parse, NULL otherwise.
static WEBP_INLINE WebPDemuxer* WebPDemux(const WebPData* data) {
return WebPDemuxInternal(data, 0, NULL, WEBP_DEMUX_ABI_VERSION);
}
// Parses the possibly incomplete WebP file given by 'data'.
// If 'state' is non-NULL it will be set to indicate the status of the demuxer.
// Returns a WebPDemuxer object on successful parse, NULL otherwise.
static WEBP_INLINE WebPDemuxer* WebPDemuxPartial(
const WebPData* data, WebPDemuxState* state) {
return WebPDemuxInternal(data, 1, state, WEBP_DEMUX_ABI_VERSION);
}
// Frees memory associated with 'dmux'.
WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
//------------------------------------------------------------------------------
// Data/information extraction.
enum WebPFormatFeature {
WEBP_FF_FORMAT_FLAGS, // Extended format flags present in the 'VP8X' chunk.
WEBP_FF_CANVAS_WIDTH,
WEBP_FF_CANVAS_HEIGHT,
WEBP_FF_LOOP_COUNT,
WEBP_FF_BACKGROUND_COLOR,
WEBP_FF_FRAME_COUNT // Number of frames present in the demux object.
// In case of a partial demux, this is the number of
// frames seen so far, with the last frame possibly
// being partial.
};
// Get the 'feature' value from the 'dmux'.
// NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
// returned a state > WEBP_DEMUX_PARSING_HEADER.
WEBP_EXTERN(uint32_t) WebPDemuxGetI(
const WebPDemuxer* dmux, WebPFormatFeature feature);
//------------------------------------------------------------------------------
// Frame iteration.
struct WebPIterator {
int frame_num;
int num_frames; // equivalent to WEBP_FF_FRAME_COUNT.
int fragment_num;
int num_fragments;
int x_offset, y_offset; // offset relative to the canvas.
int width, height; // dimensions of this frame or fragment.
int duration; // display duration in milliseconds.
WebPMuxAnimDispose dispose_method; // dispose method for the frame.
int complete; // true if 'fragment' contains a full frame. partial images
// may still be decoded with the WebP incremental decoder.
WebPData fragment; // The frame or fragment given by 'frame_num' and
// 'fragment_num'.
uint32_t pad[4]; // padding for later use.
void* private_; // for internal use only.
};
// Retrieves frame 'frame_number' from 'dmux'.
// 'iter->fragment' points to the first fragment on return from this function.
// Individual fragments may be extracted using WebPDemuxSetFragment().
// Setting 'frame_number' equal to 0 will return the last frame of the image.
// Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
// Call WebPDemuxReleaseIterator() when use of the iterator is complete.
// NOTE: 'dmux' must persist for the lifetime of 'iter'.
WEBP_EXTERN(int) WebPDemuxGetFrame(
const WebPDemuxer* dmux, int frame_number, WebPIterator* iter);
// Sets 'iter->fragment' to point to the next ('iter->frame_num' + 1) or
// previous ('iter->frame_num' - 1) frame. These functions do not loop.
// Returns true on success, false otherwise.
WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter);
WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter);
// Sets 'iter->fragment' to reflect fragment number 'fragment_num'.
// Returns true if fragment 'fragment_num' is present, false otherwise.
WEBP_EXTERN(int) WebPDemuxSelectFragment(WebPIterator* iter, int fragment_num);
// Releases any memory associated with 'iter'.
// Must be called before any subsequent calls to WebPDemuxGetChunk() on the same
// iter. Also, must be called before destroying the associated WebPDemuxer with
// WebPDemuxDelete().
WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter);
//------------------------------------------------------------------------------
// Chunk iteration.
struct WebPChunkIterator {
// The current and total number of chunks with the fourcc given to
// WebPDemuxGetChunk().
int chunk_num;
int num_chunks;
WebPData chunk; // The payload of the chunk.
uint32_t pad[6]; // padding for later use
void* private_;
};
// Retrieves the 'chunk_number' instance of the chunk with id 'fourcc' from
// 'dmux'.
// 'fourcc' is a character array containing the fourcc of the chunk to return,
// e.g., "ICCP", "XMP ", "EXIF", etc.
// Setting 'chunk_number' equal to 0 will return the last chunk in a set.
// Returns true if the chunk is found, false otherwise. Image related chunk
// payloads are accessed through WebPDemuxGetFrame() and related functions.
// Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
// NOTE: 'dmux' must persist for the lifetime of the iterator.
WEBP_EXTERN(int) WebPDemuxGetChunk(const WebPDemuxer* dmux,
const char fourcc[4], int chunk_number,
WebPChunkIterator* iter);
// Sets 'iter->chunk' to point to the next ('iter->chunk_num' + 1) or previous
// ('iter->chunk_num' - 1) chunk. These functions do not loop.
// Returns true on success, false otherwise.
WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter);
WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter);
// Releases any memory associated with 'iter'.
// Must be called before destroying the associated WebPDemuxer with
// WebPDemuxDelete().
WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
#endif /* WEBP_WEBP_DEMUX_H_ */

@ -18,7 +18,18 @@
extern "C" {
#endif
#define WEBP_ENCODER_ABI_VERSION 0x0200 // MAJOR(8b) + MINOR(8b)
#define WEBP_ENCODER_ABI_VERSION 0x0201 // MAJOR(8b) + MINOR(8b)
#if !(defined(__cplusplus) || defined(c_plusplus))
typedef enum WebPImageHint WebPImageHint;
typedef enum WebPEncCSP WebPEncCSP;
typedef enum WebPPreset WebPPreset;
typedef enum WebPEncodingError WebPEncodingError;
#endif
typedef struct WebPConfig WebPConfig;
typedef struct WebPPicture WebPPicture; // main structure for I/O
typedef struct WebPAuxStats WebPAuxStats;
typedef struct WebPMemoryWriter WebPMemoryWriter;
// Return the encoder's version number, packed in hexadecimal using 8bits for
// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
@ -66,15 +77,16 @@ WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra,
// Coding parameters
// Image characteristics hint for the underlying encoder.
typedef enum {
enum WebPImageHint {
WEBP_HINT_DEFAULT = 0, // default preset.
WEBP_HINT_PICTURE, // digital picture, like portrait, inner shot
WEBP_HINT_PHOTO, // outdoor photograph, with natural lighting
WEBP_HINT_GRAPH, // Discrete tone image (graph, map-tile etc).
WEBP_HINT_LAST
} WebPImageHint;
};
typedef struct {
// Compression parameters.
struct WebPConfig {
int lossless; // Lossless encoding (0=lossy(default), 1=lossless).
float quality; // between 0 (smallest file) and 100 (biggest)
int method; // quality/speed trade-off (0=fast, 6=slower-better)
@ -109,20 +121,26 @@ typedef struct {
int partition_limit; // quality degradation allowed to fit the 512k limit
// on prediction modes coding (0: no degradation,
// 100: maximum possible degradation).
uint32_t pad[8]; // padding for later use
} WebPConfig;
int emulate_jpeg_size; // If true, compression parameters will be remapped
// to better match the expected output size from
// JPEG compression. Generally, the output size will
// be similar but the degradation will be lower.
int thread_level; // If non-zero, try and use multi-threaded encoding.
int low_memory; // If set, reduce memory usage (but increase CPU use).
uint32_t pad[5]; // padding for later use
};
// Enumerate some predefined settings for WebPConfig, depending on the type
// of source picture. These presets are used when calling WebPConfigPreset().
typedef enum {
enum WebPPreset {
WEBP_PRESET_DEFAULT = 0, // default preset.
WEBP_PRESET_PICTURE, // digital picture, like portrait, inner shot
WEBP_PRESET_PHOTO, // outdoor photograph, with natural lighting
WEBP_PRESET_DRAWING, // hand or line drawing, with high-contrast details
WEBP_PRESET_ICON, // small-sized colorful images
WEBP_PRESET_TEXT // text-like
} WebPPreset;
};
// Internal, version-checked, entry point
WEBP_EXTERN(int) WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
@ -152,11 +170,9 @@ WEBP_EXTERN(int) WebPValidateConfig(const WebPConfig* config);
//------------------------------------------------------------------------------
// Input / Output
typedef struct WebPPicture WebPPicture; // main structure for I/O
// Structure for storing auxiliary statistics (mostly for lossy encoding).
typedef struct {
struct WebPAuxStats {
int coded_size; // final size
float PSNR[5]; // peak-signal-to-noise ratio for Y/U/V/All/Alpha
@ -182,7 +198,7 @@ typedef struct {
int lossless_size; // final lossless size
uint32_t pad[4]; // padding for later use
} WebPAuxStats;
};
// Signature for output function. Should return true if writing was successful.
// data/data_size is the segment of data to write, and 'picture' is for
@ -192,18 +208,19 @@ typedef int (*WebPWriterFunction)(const uint8_t* data, size_t data_size,
// WebPMemoryWrite: a special WebPWriterFunction that writes to memory using
// the following WebPMemoryWriter object (to be set as a custom_ptr).
typedef struct {
struct WebPMemoryWriter {
uint8_t* mem; // final buffer (of size 'max_size', larger than 'size').
size_t size; // final size
size_t max_size; // total capacity
uint32_t pad[1]; // padding for later use
} WebPMemoryWriter;
};
// The following must be called first before any use.
WEBP_EXTERN(void) WebPMemoryWriterInit(WebPMemoryWriter* writer);
// The custom writer to be used with WebPMemoryWriter as custom_ptr. Upon
// completion, writer.mem and writer.size will hold the coded data.
// writer.mem must be freed using the call 'free(writer.mem)'.
WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
const WebPPicture* picture);
@ -212,7 +229,8 @@ WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
// everything is OK.
typedef int (*WebPProgressHook)(int percent, const WebPPicture* picture);
typedef enum {
// Color spaces.
enum WebPEncCSP {
// chroma sampling
WEBP_YUV420 = 0, // 4:2:0
WEBP_YUV422 = 1, // 4:2:2
@ -225,10 +243,10 @@ typedef enum {
WEBP_YUV444A = 6,
WEBP_YUV400A = 7, // grayscale + alpha
WEBP_CSP_ALPHA_BIT = 4 // bit that is set if alpha is present
} WebPEncCSP;
};
// Encoding error conditions.
typedef enum {
enum WebPEncodingError {
VP8_ENC_OK = 0,
VP8_ENC_ERROR_OUT_OF_MEMORY, // memory error allocating objects
VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY, // memory error while flushing bits
@ -241,14 +259,13 @@ typedef enum {
VP8_ENC_ERROR_FILE_TOO_BIG, // file is bigger than 4G
VP8_ENC_ERROR_USER_ABORT, // abort request by user
VP8_ENC_ERROR_LAST // list terminator. always last.
} WebPEncodingError;
};
// maximum width/height allowed (inclusive), in pixels
#define WEBP_MAX_DIMENSION 16383
// Main exchange structure (input samples, output bytes, statistics)
struct WebPPicture {
// INPUT
//////////////
// Main flag for encoder selecting between ARGB or YUV input.
@ -348,13 +365,13 @@ WEBP_EXTERN(void) WebPPictureFree(WebPPicture* picture);
// Returns false in case of memory allocation error.
WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
// Compute PSNR or SSIM distortion between two pictures.
// Compute PSNR, SSIM or LSIM distortion metric between two pictures.
// Result is in dB, stores in result[] in the Y/U/V/Alpha/All order.
// Returns false in case of error (pic1 and pic2 don't have same dimension, ...)
// Returns false in case of error (src and ref don't have same dimension, ...)
// Warning: this function is rather CPU-intensive.
WEBP_EXTERN(int) WebPPictureDistortion(
const WebPPicture* pic1, const WebPPicture* pic2,
int metric_type, // 0 = PSNR, 1 = SSIM
const WebPPicture* src, const WebPPicture* ref,
int metric_type, // 0 = PSNR, 1 = SSIM, 2 = LSIM
float result[5]);
// self-crops a picture to the rectangle defined by top/left/width/height.

@ -12,6 +12,9 @@
#ifndef WEBP_WEBP_FORMAT_CONSTANTS_H_
#define WEBP_WEBP_FORMAT_CONSTANTS_H_
// Create fourcc of the chunk from the chunk tag characters.
#define MKFOURCC(a, b, c, d) ((uint32_t)(a) | (b) << 8 | (c) << 16 | (d) << 24)
// VP8 related constants.
#define VP8_SIGNATURE 0x9d012a // Signature in VP8 data.
#define VP8_MAX_PARTITION0_SIZE (1 << 19) // max size of mode partition
@ -65,23 +68,16 @@ typedef enum {
#define CHUNK_SIZE_BYTES 4 // Size needed to store chunk's size.
#define CHUNK_HEADER_SIZE 8 // Size of a chunk header.
#define RIFF_HEADER_SIZE 12 // Size of the RIFF header ("RIFFnnnnWEBP").
#define FRAME_CHUNK_SIZE 15 // Size of a FRM chunk.
#define LOOP_CHUNK_SIZE 2 // Size of a LOOP chunk.
#define TILE_CHUNK_SIZE 6 // Size of a TILE chunk.
#define ANMF_CHUNK_SIZE 16 // Size of an ANMF chunk.
#define ANIM_CHUNK_SIZE 6 // Size of an ANIM chunk.
#define FRGM_CHUNK_SIZE 6 // Size of a FRGM chunk.
#define VP8X_CHUNK_SIZE 10 // Size of a VP8X chunk.
#define TILING_FLAG_BIT 0x01 // Set if tiles are possibly used.
#define ANIMATION_FLAG_BIT 0x02 // Set if some animation is expected
#define ICC_FLAG_BIT 0x04 // Whether ICC is present or not.
#define METADATA_FLAG_BIT 0x08 // Set if some META chunk is possibly present.
#define ALPHA_FLAG_BIT 0x10 // Should be same as the ALPHA_FLAG in mux.h
#define ROTATION_FLAG_BITS 0xe0 // all 3 bits for rotation + symmetry
#define MAX_CANVAS_SIZE (1 << 24) // 24-bit max for VP8X width/height.
#define MAX_IMAGE_AREA (1ULL << 32) // 32-bit max for width x height.
#define MAX_LOOP_COUNT (1 << 16) // maximum value for loop-count
#define MAX_DURATION (1 << 24) // maximum duration
#define MAX_POSITION_OFFSET (1 << 24) // maximum frame/tile x/y offset
#define MAX_CANVAS_SIZE (1 << 24) // 24-bit max for VP8X width/height.
#define MAX_IMAGE_AREA (1ULL << 32) // 32-bit max for width x height.
#define MAX_LOOP_COUNT (1 << 16) // maximum value for loop-count
#define MAX_DURATION (1 << 24) // maximum duration
#define MAX_POSITION_OFFSET (1 << 24) // maximum frame/fragment x/y offset
// Maximum chunk payload is such that adding the header and padding won't
// overflow a uint32_t.

@ -11,7 +11,7 @@
// Vikas (vikasa@google.com)
// This API allows manipulation of WebP container images containing features
// like Color profile, XMP metadata, Animation and Tiling.
// like color profile, metadata, animation and fragmented images.
//
// Code Example#1: Creating a MUX with image data, color profile and XMP
// metadata.
@ -21,13 +21,13 @@
// // ... (Prepare image data).
// WebPMuxSetImage(mux, &image, copy_data);
// // ... (Prepare ICCP color profile data).
// WebPMuxSetColorProfile(mux, &icc_profile, copy_data);
// WebPMuxSetChunk(mux, "ICCP", &icc_profile, copy_data);
// // ... (Prepare XMP metadata).
// WebPMuxSetMetadata(mux, &xmp, copy_data);
// WebPMuxSetChunk(mux, "XMP ", &xmp, copy_data);
// // Get data from mux in WebP RIFF format.
// WebPMuxAssemble(mux, &output_data);
// WebPMuxDelete(mux);
// // ... (Consume output_data; e.g. write output_data.bytes_ to file).
// // ... (Consume output_data; e.g. write output_data.bytes to file).
// WebPDataClear(&output_data);
//
// Code Example#2: Get image and color profile data from a WebP file.
@ -35,9 +35,9 @@
// int copy_data = 0;
// // ... (Read data from file).
// WebPMux* mux = WebPMuxCreate(&data, copy_data);
// WebPMuxGetImage(mux, &image);
// WebPMuxGetFrame(mux, 1, &image);
// // ... (Consume image; e.g. call WebPDecode() to decode the data).
// WebPMuxGetColorProfile(mux, &icc_profile);
// WebPMuxGetChunk(mux, "ICCP", &icc_profile);
// // ... (Consume icc_data).
// WebPMuxDelete(mux);
// free(data);
@ -45,7 +45,7 @@
#ifndef WEBP_WEBP_MUX_H_
#define WEBP_WEBP_MUX_H_
#include "./types.h"
#include "./mux_types.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -53,61 +53,44 @@ extern "C" {
#define WEBP_MUX_ABI_VERSION 0x0100 // MAJOR(8b) + MINOR(8b)
typedef struct WebPMux WebPMux; // main opaque object.
#if !(defined(__cplusplus) || defined(c_plusplus))
typedef enum WebPMuxError WebPMuxError;
typedef enum WebPChunkId WebPChunkId;
#endif
typedef struct WebPMuxFrameInfo WebPMuxFrameInfo;
typedef struct WebPMuxAnimParams WebPMuxAnimParams;
// Error codes
typedef enum {
enum WebPMuxError {
WEBP_MUX_OK = 1,
WEBP_MUX_NOT_FOUND = 0,
WEBP_MUX_INVALID_ARGUMENT = -1,
WEBP_MUX_BAD_DATA = -2,
WEBP_MUX_MEMORY_ERROR = -3,
WEBP_MUX_NOT_ENOUGH_DATA = -4
} WebPMuxError;
// Flag values for different features used in VP8X chunk.
typedef enum {
TILE_FLAG = 0x00000001,
ANIMATION_FLAG = 0x00000002,
ICCP_FLAG = 0x00000004,
META_FLAG = 0x00000008,
ALPHA_FLAG = 0x00000010
} WebPFeatureFlags;
};
// IDs for different types of chunks.
typedef enum {
enum WebPChunkId {
WEBP_CHUNK_VP8X, // VP8X
WEBP_CHUNK_ICCP, // ICCP
WEBP_CHUNK_LOOP, // LOOP
WEBP_CHUNK_FRAME, // FRM
WEBP_CHUNK_TILE, // TILE
WEBP_CHUNK_ANIM, // ANIM
WEBP_CHUNK_ANMF, // ANMF
WEBP_CHUNK_FRGM, // FRGM
WEBP_CHUNK_ALPHA, // ALPH
WEBP_CHUNK_IMAGE, // VP8/VP8L
WEBP_CHUNK_META, // META
WEBP_CHUNK_EXIF, // EXIF
WEBP_CHUNK_XMP, // XMP
WEBP_CHUNK_UNKNOWN, // Other chunks.
WEBP_CHUNK_NIL
} WebPChunkId;
typedef struct WebPMux WebPMux; // main opaque object.
// Data type used to describe 'raw' data, e.g., chunk data
// (ICC profile, metadata) and WebP compressed image data.
typedef struct {
const uint8_t* bytes_;
size_t size_;
} WebPData;
};
//------------------------------------------------------------------------------
// Manipulation of a WebPData object.
// Initializes the contents of the 'webp_data' object with default values.
WEBP_EXTERN(void) WebPDataInit(WebPData* webp_data);
// Clears the contents of the 'webp_data' object by calling free(). Does not
// deallocate the object itself.
WEBP_EXTERN(void) WebPDataClear(WebPData* webp_data);
// Allocates necessary storage for 'dst' and copies the contents of 'src'.
// Returns true on success.
WEBP_EXTERN(int) WebPDataCopy(const WebPData* src, WebPData* dst);
// Returns the version number of the mux library, packed in hexadecimal using
// 8bits or each of major/minor/revision. E.g: v2.5.7 is 0x020507.
WEBP_EXTERN(int) WebPGetMuxVersion(void);
//------------------------------------------------------------------------------
// Life of a Mux object
@ -136,8 +119,8 @@ WEBP_EXTERN(WebPMux*) WebPMuxCreateInternal(const WebPData*, int, int);
// Creates a mux object from raw data given in WebP RIFF format.
// Parameters:
// bitstream - (in) the bitstream data in WebP RIFF format
// copy_data - (in) value 1 indicates given data WILL copied to the mux, and
// value 0 indicates data will NOT be copied.
// copy_data - (in) value 1 indicates given data WILL be copied to the mux
// and value 0 indicates data will NOT be copied.
// Returns:
// A pointer to the mux object created from given data - on success.
// NULL - In case of invalid data or memory error.
@ -147,270 +130,175 @@ static WEBP_INLINE WebPMux* WebPMuxCreate(const WebPData* bitstream,
}
//------------------------------------------------------------------------------
// Single Image.
// Non-image chunks.
// Sets the image in the mux object. Any existing images (including frame/tile)
// will be removed.
// Parameters:
// mux - (in/out) object in which the image is to be set
// bitstream - (in) can either be a raw VP8/VP8L bitstream or a single-image
// WebP file (non-animated and non-tiled)
// copy_data - (in) value 1 indicates given data WILL copied to the mux, and
// value 0 indicates data will NOT be copied.
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL.
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(WebPMux* mux,
const WebPData* bitstream,
int copy_data);
// Gets image data from the mux object.
// The content of 'bitstream' is allocated using malloc(), and NOT
// owned by the 'mux' object. It MUST be deallocated by the caller by calling
// WebPDataClear().
// Parameters:
// mux - (in) object from which the image is to be fetched
// bitstream - (out) the image data
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if either mux or bitstream is NULL
// OR mux contains animation/tiling.
// WEBP_MUX_NOT_FOUND - if image is not present in mux object.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxGetImage(const WebPMux* mux,
WebPData* bitstream);
// Note: Only non-image related chunks should be managed through chunk APIs.
// (Image related chunks are: "ANMF", "FRGM", "VP8 ", "VP8L" and "ALPH").
// To add, get and delete images, use APIs WebPMuxSetImage(),
// WebPMuxPushFrame(), WebPMuxGetFrame() and WebPMuxDeleteFrame().
// Deletes the image in the mux object.
// Adds a chunk with id 'fourcc' and data 'chunk_data' in the mux object.
// Any existing chunk(s) with the same id will be removed.
// Parameters:
// mux - (in/out) object from which the image is to be deleted
// mux - (in/out) object to which the chunk is to be added
// fourcc - (in) a character array containing the fourcc of the given chunk;
// e.g., "ICCP", "XMP ", "EXIF" etc.
// chunk_data - (in) the chunk data to be added
// copy_data - (in) value 1 indicates given data WILL be copied to the mux
// and value 0 indicates data will NOT be copied.
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
// OR if mux contains animation/tiling.
// WEBP_MUX_NOT_FOUND - if image is not present in mux object.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxDeleteImage(WebPMux* mux);
//------------------------------------------------------------------------------
// XMP Metadata.
// Sets the XMP metadata in the mux object. Any existing metadata chunk(s) will
// be removed.
// Parameters:
// mux - (in/out) object to which the XMP metadata is to be added
// metadata - (in) the XMP metadata data to be added
// copy_data - (in) value 1 indicates given data WILL copied to the mux, and
// value 0 indicates data will NOT be copied.
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux or metadata is NULL.
// WEBP_MUX_INVALID_ARGUMENT - if mux, fourcc or chunk_data is NULL
// or if fourcc corresponds to an image chunk.
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxSetMetadata(WebPMux* mux,
const WebPData* metadata,
int copy_data);
WEBP_EXTERN(WebPMuxError) WebPMuxSetChunk(
WebPMux* mux, const char fourcc[4], const WebPData* chunk_data,
int copy_data);
// Gets a reference to the XMP metadata in the mux object.
// Gets a reference to the data of the chunk with id 'fourcc' in the mux object.
// The caller should NOT free the returned data.
// Parameters:
// mux - (in) object from which the XMP metadata is to be fetched
// metadata - (out) XMP metadata
// mux - (in) object from which the chunk data is to be fetched
// fourcc - (in) a character array containing the fourcc of the chunk;
// e.g., "ICCP", "XMP ", "EXIF" etc.
// chunk_data - (out) returned chunk data
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if either mux or metadata is NULL.
// WEBP_MUX_NOT_FOUND - if metadata is not present in mux object.
// WEBP_MUX_INVALID_ARGUMENT - if either mux, fourcc or chunk_data is NULL
// or if fourcc corresponds to an image chunk.
// WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given id.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxGetMetadata(const WebPMux* mux,
WebPData* metadata);
WEBP_EXTERN(WebPMuxError) WebPMuxGetChunk(
const WebPMux* mux, const char fourcc[4], WebPData* chunk_data);
// Deletes the XMP metadata in the mux object.
// Deletes the chunk with the given 'fourcc' from the mux object.
// Parameters:
// mux - (in/out) object from which XMP metadata is to be deleted
// mux - (in/out) object from which the chunk is to be deleted
// fourcc - (in) a character array containing the fourcc of the chunk;
// e.g., "ICCP", "XMP ", "EXIF" etc.
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
// WEBP_MUX_NOT_FOUND - If mux does not contain metadata.
// WEBP_MUX_INVALID_ARGUMENT - if mux or fourcc is NULL
// or if fourcc corresponds to an image chunk.
// WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given fourcc.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxDeleteMetadata(WebPMux* mux);
WEBP_EXTERN(WebPMuxError) WebPMuxDeleteChunk(
WebPMux* mux, const char fourcc[4]);
//------------------------------------------------------------------------------
// ICC Color Profile.
// Sets the color profile in the mux object. Any existing color profile chunk(s)
// will be removed.
// Parameters:
// mux - (in/out) object to which the color profile is to be added
// color_profile - (in) the color profile data to be added
// copy_data - (in) value 1 indicates given data WILL copied to the mux, and
// value 0 indicates data will NOT be copied.
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux or color_profile is NULL
// WEBP_MUX_MEMORY_ERROR - on memory allocation error
// WEBP_MUX_OK - on success
WEBP_EXTERN(WebPMuxError) WebPMuxSetColorProfile(WebPMux* mux,
const WebPData* color_profile,
int copy_data);
// Gets a reference to the color profile in the mux object.
// The caller should NOT free the returned data.
// Parameters:
// mux - (in) object from which the color profile data is to be fetched
// color_profile - (out) color profile data
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if either mux or color_profile is NULL.
// WEBP_MUX_NOT_FOUND - if color profile is not present in mux object.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxGetColorProfile(const WebPMux* mux,
WebPData* color_profile);
// Deletes the color profile in the mux object.
// Images.
// Encapsulates data about a single frame/fragment.
struct WebPMuxFrameInfo {
WebPData bitstream; // image data: can either be a raw VP8/VP8L bitstream
// or a single-image WebP file.
int x_offset; // x-offset of the frame.
int y_offset; // y-offset of the frame.
int duration; // duration of the frame (in milliseconds).
WebPChunkId id; // frame type: should be one of WEBP_CHUNK_ANMF,
// WEBP_CHUNK_FRGM or WEBP_CHUNK_IMAGE
WebPMuxAnimDispose dispose_method; // Disposal method for the frame.
uint32_t pad[2]; // padding for later use
};
// Sets the (non-animated and non-fragmented) image in the mux object.
// Note: Any existing images (including frames/fragments) will be removed.
// Parameters:
// mux - (in/out) object from which color profile is to be deleted
// mux - (in/out) object in which the image is to be set
// bitstream - (in) can either be a raw VP8/VP8L bitstream or a single-image
// WebP file (non-animated and non-fragmented)
// copy_data - (in) value 1 indicates given data WILL be copied to the mux
// and value 0 indicates data will NOT be copied.
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
// WEBP_MUX_NOT_FOUND - If mux does not contain color profile.
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL.
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxDeleteColorProfile(WebPMux* mux);
//------------------------------------------------------------------------------
// Animation.
// Adds an animation frame at the end of the mux object.
// Note: as WebP only supports even offsets, any odd offset will be snapped to
// an even location using: offset &= ~1
WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(
WebPMux* mux, const WebPData* bitstream, int copy_data);
// Adds a frame at the end of the mux object.
// Notes: (1) frame.id should be one of WEBP_CHUNK_ANMF or WEBP_CHUNK_FRGM
// (2) For setting a non-animated non-fragmented image, use
// WebPMuxSetImage() instead.
// (3) Type of frame being pushed must be same as the frames in mux.
// (4) As WebP only supports even offsets, any odd offset will be snapped
// to an even location using: offset &= ~1
// Parameters:
// mux - (in/out) object to which an animation frame is to be added
// bitstream - (in) the image data corresponding to the frame. It can either
// be a raw VP8/VP8L bitstream or a single-image WebP file
// (non-animated and non-tiled)
// x_offset - (in) x-offset of the frame to be added
// y_offset - (in) y-offset of the frame to be added
// duration - (in) duration of the frame to be added (in milliseconds)
// copy_data - (in) value 1 indicates given data WILL copied to the mux, and
// value 0 indicates data will NOT be copied.
// mux - (in/out) object to which the frame is to be added
// frame - (in) frame data.
// copy_data - (in) value 1 indicates given data WILL be copied to the mux
// and value 0 indicates data will NOT be copied.
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL
// WEBP_MUX_INVALID_ARGUMENT - if mux or frame is NULL
// or if content of 'frame' is invalid.
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxPushFrame(
WebPMux* mux, const WebPData* bitstream,
int x_offset, int y_offset, int duration, int copy_data);
// TODO(urvang): Create a struct as follows to reduce argument list size:
// typedef struct {
// WebPData bitstream;
// int x_offset, y_offset;
// int duration;
// } FrameInfo;
// Gets the nth animation frame from the mux object.
// The content of 'bitstream' is allocated using malloc(), and NOT
WebPMux* mux, const WebPMuxFrameInfo* frame, int copy_data);
// Gets the nth frame from the mux object.
// The content of 'frame->bitstream' is allocated using malloc(), and NOT
// owned by the 'mux' object. It MUST be deallocated by the caller by calling
// WebPDataClear().
// nth=0 has a special meaning - last position.
// Parameters:
// mux - (in) object from which the info is to be fetched
// nth - (in) index of the frame in the mux object
// bitstream - (out) the image data
// x_offset - (out) x-offset of the returned frame
// y_offset - (out) y-offset of the returned frame
// duration - (out) duration of the returned frame (in milliseconds)
// frame - (out) data of the returned frame
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if either mux, bitstream, x_offset,
// y_offset, or duration is NULL
// WEBP_MUX_INVALID_ARGUMENT - if mux or frame is NULL.
// WEBP_MUX_NOT_FOUND - if there are less than nth frames in the mux object.
// WEBP_MUX_BAD_DATA - if nth frame chunk in mux is invalid.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxGetFrame(
const WebPMux* mux, uint32_t nth, WebPData* bitstream,
int* x_offset, int* y_offset, int* duration);
const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame);
// Deletes an animation frame from the mux object.
// Deletes a frame from the mux object.
// nth=0 has a special meaning - last position.
// Parameters:
// mux - (in/out) object from which a frame is to be deleted
// nth - (in) The position from which the frame is to be deleted
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL.
// WEBP_MUX_NOT_FOUND - If there are less than nth frames in the mux object
// before deletion.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth);
// Sets the animation loop count in the mux object. Any existing loop count
// value(s) will be removed.
// Parameters:
// mux - (in/out) object in which loop chunk is to be set/added
// loop_count - (in) animation loop count value.
// Note that loop_count of zero denotes infinite loop.
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxSetLoopCount(WebPMux* mux, int loop_count);
// Gets the animation loop count from the mux object.
// Parameters:
// mux - (in) object from which the loop count is to be fetched
// loop_count - (out) the loop_count value present in the LOOP chunk
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if either of mux or loop_count is NULL
// WEBP_MUX_NOT_FOUND - if loop chunk is not present in mux object.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxGetLoopCount(const WebPMux* mux,
int* loop_count);
//------------------------------------------------------------------------------
// Tiling.
// Animation.
// Adds a tile at the end of the mux object.
// Note: as WebP only supports even offsets, any odd offset will be snapped to
// an even location using: offset &= ~1
// Animation parameters.
struct WebPMuxAnimParams {
uint32_t bgcolor; // Background color of the canvas stored (in MSB order) as:
// Bits 00 to 07: Alpha.
// Bits 08 to 15: Red.
// Bits 16 to 23: Green.
// Bits 24 to 31: Blue.
int loop_count; // Number of times to repeat the animation [0 = infinite].
};
// Sets the animation parameters in the mux object. Any existing ANIM chunks
// will be removed.
// Parameters:
// mux - (in/out) object to which a tile is to be added.
// bitstream - (in) the image data corresponding to the frame. It can either
// be a raw VP8/VP8L bitstream or a single-image WebP file
// (non-animated and non-tiled)
// x_offset - (in) x-offset of the tile to be added
// y_offset - (in) y-offset of the tile to be added
// copy_data - (in) value 1 indicates given data WILL copied to the mux, and
// value 0 indicates data will NOT be copied.
// mux - (in/out) object in which ANIM chunk is to be set/added
// params - (in) animation parameters.
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL
// WEBP_MUX_INVALID_ARGUMENT - if either mux or params is NULL
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxPushTile(
WebPMux* mux, const WebPData* bitstream,
int x_offset, int y_offset, int copy_data);
WEBP_EXTERN(WebPMuxError) WebPMuxSetAnimationParams(
WebPMux* mux, const WebPMuxAnimParams* params);
// Gets the nth tile from the mux object.
// The content of 'bitstream' is allocated using malloc(), and NOT
// owned by the 'mux' object. It MUST be deallocated by the caller by calling
// WebPDataClear().
// nth=0 has a special meaning - last position.
// Gets the animation parameters from the mux object.
// Parameters:
// mux - (in) object from which the info is to be fetched
// nth - (in) index of the tile in the mux object
// bitstream - (out) the image data
// x_offset - (out) x-offset of the returned tile
// y_offset - (out) y-offset of the returned tile
// mux - (in) object from which the animation parameters to be fetched
// params - (out) animation parameters extracted from the ANIM chunk
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if either mux, bitstream, x_offset or
// y_offset is NULL
// WEBP_MUX_NOT_FOUND - if there are less than nth tiles in the mux object.
// WEBP_MUX_BAD_DATA - if nth tile chunk in mux is invalid.
// WEBP_MUX_INVALID_ARGUMENT - if either of mux or params is NULL
// WEBP_MUX_NOT_FOUND - if ANIM chunk is not present in mux object.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxGetTile(
const WebPMux* mux, uint32_t nth, WebPData* bitstream,
int* x_offset, int* y_offset);
// Deletes a tile from the mux object.
// nth=0 has a special meaning - last position
// Parameters:
// mux - (in/out) object from which a tile is to be deleted
// nth - (in) The position from which the tile is to be deleted
// Returns:
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
// WEBP_MUX_NOT_FOUND - If there are less than nth tiles in the mux object
// before deletion.
// WEBP_MUX_OK - on success.
WEBP_EXTERN(WebPMuxError) WebPMuxDeleteTile(WebPMux* mux, uint32_t nth);
WEBP_EXTERN(WebPMuxError) WebPMuxGetAnimationParams(
const WebPMux* mux, WebPMuxAnimParams* params);
//------------------------------------------------------------------------------
// Misc Utilities.
@ -458,143 +346,6 @@ WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux,
WEBP_EXTERN(WebPMuxError) WebPMuxAssemble(WebPMux* mux,
WebPData* assembled_data);
//------------------------------------------------------------------------------
// Demux API.
// Enables extraction of image and extended format data from WebP files.
#define WEBP_DEMUX_ABI_VERSION 0x0100 // MAJOR(8b) + MINOR(8b)
typedef struct WebPDemuxer WebPDemuxer;
typedef enum {
WEBP_DEMUX_PARSING_HEADER, // Not enough data to parse full header.
WEBP_DEMUX_PARSED_HEADER, // Header parsing complete, data may be available.
WEBP_DEMUX_DONE // Entire file has been parsed.
} WebPDemuxState;
//------------------------------------------------------------------------------
// Life of a Demux object
// Internal, version-checked, entry point
WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(
const WebPData*, int, WebPDemuxState*, int);
// Parses the WebP file given by 'data'.
// A complete WebP file must be present in 'data' for the function to succeed.
// Returns a WebPDemuxer object on successful parse, NULL otherwise.
static WEBP_INLINE WebPDemuxer* WebPDemux(const WebPData* data) {
return WebPDemuxInternal(data, 0, NULL, WEBP_DEMUX_ABI_VERSION);
}
// Parses the WebP file given by 'data'.
// If 'state' is non-NULL it will be set to indicate the status of the demuxer.
// Returns a WebPDemuxer object on successful parse, NULL otherwise.
static WEBP_INLINE WebPDemuxer* WebPDemuxPartial(
const WebPData* data, WebPDemuxState* state) {
return WebPDemuxInternal(data, 1, state, WEBP_DEMUX_ABI_VERSION);
}
// Frees memory associated with 'dmux'.
WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
//------------------------------------------------------------------------------
// Data/information extraction.
typedef enum {
WEBP_FF_FORMAT_FLAGS, // Extended format flags present in the 'VP8X' chunk.
WEBP_FF_CANVAS_WIDTH,
WEBP_FF_CANVAS_HEIGHT,
WEBP_FF_LOOP_COUNT
} WebPFormatFeature;
// Get the 'feature' value from the 'dmux'.
// NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
// returned a state > WEBP_DEMUX_PARSING_HEADER.
WEBP_EXTERN(uint32_t) WebPDemuxGetI(
const WebPDemuxer* dmux, WebPFormatFeature feature);
//------------------------------------------------------------------------------
// Frame iteration.
typedef struct {
int frame_num_;
int num_frames_;
int tile_num_;
int num_tiles_;
int x_offset_, y_offset_; // offset relative to the canvas.
int width_, height_; // dimensions of this frame or tile.
int duration_; // display duration in milliseconds.
int complete_; // true if 'tile_' contains a full frame. partial images may
// still be decoded with the WebP incremental decoder.
WebPData tile_; // The frame or tile given by 'frame_num_' and 'tile_num_'.
uint32_t pad[4]; // padding for later use
void* private_;
} WebPIterator;
// Retrieves frame 'frame_number' from 'dmux'.
// 'iter->tile_' points to the first tile on return from this function.
// Individual tiles may be extracted using WebPDemuxSetTile().
// Setting 'frame_number' equal to 0 will return the last frame of the image.
// Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
// Call WebPDemuxReleaseIterator() when use of the iterator is complete.
// NOTE: 'dmux' must persist for the lifetime of 'iter'.
WEBP_EXTERN(int) WebPDemuxGetFrame(
const WebPDemuxer* dmux, int frame_number, WebPIterator* iter);
// Sets 'iter->tile_' to point to the next ('iter->frame_num_' + 1) or previous
// ('iter->frame_num_' - 1) frame. These functions do not loop.
// Returns true on success, false otherwise.
WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter);
WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter);
// Sets 'iter->tile_' to reflect tile number 'tile_number'.
// Returns true if tile 'tile_number' is present, false otherwise.
WEBP_EXTERN(int) WebPDemuxSelectTile(WebPIterator* iter, int tile_number);
// Releases any memory associated with 'iter'.
// Must be called before destroying the associated WebPDemuxer with
// WebPDemuxDelete().
WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter);
//------------------------------------------------------------------------------
// Chunk iteration.
typedef struct {
// The current and total number of chunks with the fourcc given to
// WebPDemuxGetChunk().
int chunk_num_;
int num_chunks_;
WebPData chunk_; // The payload of the chunk.
uint32_t pad[6]; // padding for later use
void* private_;
} WebPChunkIterator;
// Retrieves the 'chunk_number' instance of the chunk with id 'fourcc' from
// 'dmux'.
// 'fourcc' is a character array containing the fourcc of the chunk to return,
// e.g., "ICCP", "META", "EXIF", etc.
// Setting 'chunk_number' equal to 0 will return the last chunk in a set.
// Returns true if the chunk is found, false otherwise. Image related chunk
// payloads are accessed through WebPDemuxGetFrame() and related functions.
// Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
// NOTE: 'dmux' must persist for the lifetime of the iterator.
WEBP_EXTERN(int) WebPDemuxGetChunk(const WebPDemuxer* dmux,
const char fourcc[4], int chunk_number,
WebPChunkIterator* iter);
// Sets 'iter->chunk_' to point to the next ('iter->chunk_num_' + 1) or previous
// ('iter->chunk_num_' - 1) chunk. These functions do not loop.
// Returns true on success, false otherwise.
WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter);
WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter);
// Releases any memory associated with 'iter'.
// Must be called before destroying the associated WebPDemuxer with
// WebPDemuxDelete().
WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)

@ -0,0 +1,87 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
// Data-types common to the mux and demux libraries.
//
// Author: Urvang (urvang@google.com)
#ifndef WEBP_WEBP_MUX_TYPES_H_
#define WEBP_WEBP_MUX_TYPES_H_
#include <stdlib.h> // free()
#include <string.h> // memset()
#include "./types.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if !(defined(__cplusplus) || defined(c_plusplus))
typedef enum WebPFeatureFlags WebPFeatureFlags;
typedef enum WebPMuxAnimDispose WebPMuxAnimDispose;
#endif
// VP8X Feature Flags.
enum WebPFeatureFlags {
FRAGMENTS_FLAG = 0x00000001,
ANIMATION_FLAG = 0x00000002,
XMP_FLAG = 0x00000004,
EXIF_FLAG = 0x00000008,
ALPHA_FLAG = 0x00000010,
ICCP_FLAG = 0x00000020
};
// Dispose method (animation only). Indicates how the area used by the current
// frame is to be treated before rendering the next frame on the canvas.
enum WebPMuxAnimDispose {
WEBP_MUX_DISPOSE_NONE, // Do not dispose.
WEBP_MUX_DISPOSE_BACKGROUND // Dispose to background color.
};
// Data type used to describe 'raw' data, e.g., chunk data
// (ICC profile, metadata) and WebP compressed image data.
typedef struct WebPData WebPData;
struct WebPData {
const uint8_t* bytes;
size_t size;
};
// Initializes the contents of the 'webp_data' object with default values.
static WEBP_INLINE void WebPDataInit(WebPData* webp_data) {
if (webp_data != NULL) {
memset(webp_data, 0, sizeof(*webp_data));
}
}
// Clears the contents of the 'webp_data' object by calling free(). Does not
// deallocate the object itself.
static WEBP_INLINE void WebPDataClear(WebPData* webp_data) {
if (webp_data != NULL) {
free((void*)webp_data->bytes);
WebPDataInit(webp_data);
}
}
// Allocates necessary storage for 'dst' and copies the contents of 'src'.
// Returns true on success.
static WEBP_INLINE int WebPDataCopy(const WebPData* src, WebPData* dst) {
if (src == NULL || dst == NULL) return 0;
WebPDataInit(dst);
if (src->bytes != NULL && src->size != 0) {
dst->bytes = (uint8_t*)malloc(src->size);
if (dst->bytes == NULL) return 0;
memcpy((void*)dst->bytes, src->bytes, src->size);
dst->size = src->size;
}
return 1;
}
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
#endif /* WEBP_WEBP_MUX_TYPES_H_ */
Loading…
Cancel
Save