mirror of https://github.com/opencv/opencv.git
commit
8bde6aea4b
137 changed files with 9404 additions and 4466 deletions
@ -0,0 +1,149 @@ |
||||
|
||||
/* palette_neon_intrinsics.c - NEON optimised palette expansion functions
|
||||
* |
||||
* Copyright (c) 2018 Cosmin Truta |
||||
* Copyright (c) 2017-2018 Arm Holdings. All rights reserved. |
||||
* Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017. |
||||
* |
||||
* This code is released under the libpng license. |
||||
* For conditions of distribution and use, see the disclaimer |
||||
* and license in png.h |
||||
*/ |
||||
|
||||
#include "../pngpriv.h" |
||||
|
||||
#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
||||
|
||||
#if defined(_MSC_VER) && defined(_M_ARM64) |
||||
# include <arm64_neon.h> |
||||
#else |
||||
# include <arm_neon.h> |
||||
#endif |
||||
|
||||
/* Build an RGBA palette from the RGB and separate alpha palettes. */ |
||||
void |
||||
png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info) |
||||
{ |
||||
png_const_colorp palette = png_ptr->palette; |
||||
png_bytep riffled_palette = png_ptr->riffled_palette; |
||||
png_const_bytep trans_alpha = png_ptr->trans_alpha; |
||||
int num_trans = png_ptr->num_trans; |
||||
int i; |
||||
|
||||
/* Initially black, opaque. */ |
||||
uint8x16x4_t w = {{ |
||||
vdupq_n_u8(0x00), |
||||
vdupq_n_u8(0x00), |
||||
vdupq_n_u8(0x00), |
||||
vdupq_n_u8(0xff), |
||||
}}; |
||||
|
||||
if (row_info->bit_depth != 8) |
||||
{ |
||||
png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba"); |
||||
return; |
||||
} |
||||
|
||||
/* First, riffle the RGB colours into a RGBA palette, the A value is
|
||||
* set to opaque for now. |
||||
*/ |
||||
for (i = 0; i < (1 << row_info->bit_depth); i += 16) |
||||
{ |
||||
uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i)); |
||||
w.val[0] = v.val[0]; |
||||
w.val[1] = v.val[1]; |
||||
w.val[2] = v.val[2]; |
||||
vst4q_u8(riffled_palette + (i << 2), w); |
||||
} |
||||
|
||||
/* Fix up the missing transparency values. */ |
||||
for (i = 0; i < num_trans; i++) |
||||
riffled_palette[(i << 2) + 3] = trans_alpha[i]; |
||||
} |
||||
|
||||
/* Expands a palettized row into RGBA. */ |
||||
int |
||||
png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info, |
||||
png_const_bytep row, png_bytepp ssp, png_bytepp ddp) |
||||
{ |
||||
png_uint_32 row_width = row_info->width; |
||||
const png_uint_32 *riffled_palette = |
||||
(const png_uint_32 *)png_ptr->riffled_palette; |
||||
const png_int_32 pixels_per_chunk = 4; |
||||
int i; |
||||
|
||||
if (row_width < pixels_per_chunk) |
||||
return 0; |
||||
|
||||
/* This function originally gets the last byte of the output row.
|
||||
* The NEON part writes forward from a given position, so we have |
||||
* to seek this back by 4 pixels x 4 bytes. |
||||
*/ |
||||
*ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1); |
||||
|
||||
for (i = 0; i < row_width; i += pixels_per_chunk) |
||||
{ |
||||
uint32x4_t cur; |
||||
png_bytep sp = *ssp - i, dp = *ddp - (i << 2); |
||||
cur = vld1q_dup_u32 (riffled_palette + *(sp - 3)); |
||||
cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1); |
||||
cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2); |
||||
cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3); |
||||
vst1q_u32((void *)dp, cur); |
||||
} |
||||
if (i != row_width) |
||||
{ |
||||
/* Remove the amount that wasn't processed. */ |
||||
i -= pixels_per_chunk; |
||||
} |
||||
|
||||
/* Decrement output pointers. */ |
||||
*ssp = *ssp - i; |
||||
*ddp = *ddp - (i << 2); |
||||
return i; |
||||
} |
||||
|
||||
/* Expands a palettized row into RGB format. */ |
||||
int |
||||
png_do_expand_palette_neon_rgb(png_structrp png_ptr, png_row_infop row_info, |
||||
png_const_bytep row, png_bytepp ssp, png_bytepp ddp) |
||||
{ |
||||
png_uint_32 row_width = row_info->width; |
||||
png_const_bytep palette = (png_const_bytep)png_ptr->palette; |
||||
const png_uint_32 pixels_per_chunk = 8; |
||||
int i; |
||||
|
||||
if (row_width <= pixels_per_chunk) |
||||
return 0; |
||||
|
||||
/* Seeking this back by 8 pixels x 3 bytes. */ |
||||
*ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1); |
||||
|
||||
for (i = 0; i < row_width; i += pixels_per_chunk) |
||||
{ |
||||
uint8x8x3_t cur; |
||||
png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i); |
||||
cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7))); |
||||
cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1); |
||||
cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2); |
||||
cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3); |
||||
cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4); |
||||
cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5); |
||||
cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6); |
||||
cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7); |
||||
vst3_u8((void *)dp, cur); |
||||
} |
||||
|
||||
if (i != row_width) |
||||
{ |
||||
/* Remove the amount that wasn't processed. */ |
||||
i -= pixels_per_chunk; |
||||
} |
||||
|
||||
/* Decrement output pointers. */ |
||||
*ssp = *ssp - i; |
||||
*ddp = *ddp - ((i << 1) + i); |
||||
return i; |
||||
} |
||||
|
||||
#endif /* PNG_ARM_NEON_IMPLEMENTATION */ |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,67 @@ |
||||
/*
|
||||
* Copyright (c) 2009 Frank Warmerdam |
||||
* |
||||
* Permission to use, copy, modify, distribute, and sell this software and
|
||||
* its documentation for any purpose is hereby granted without fee, provided |
||||
* that (i) the above copyright notices and this permission notice appear in |
||||
* all copies of the software and related documentation, and (ii) the names of |
||||
* Sam Leffler and Silicon Graphics may not be used in any advertising or |
||||
* publicity relating to the software without the specific, prior written |
||||
* permission of Sam Leffler and Silicon Graphics. |
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
|
||||
* WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR |
||||
* ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, |
||||
* OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
||||
* WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF |
||||
* LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
||||
* OF THIS SOFTWARE. |
||||
*/ |
||||
|
||||
#ifndef _LIBPORT_ |
||||
#define _LIBPORT_ |
||||
|
||||
int getopt(int argc, char * const argv[], const char *optstring); |
||||
extern char *optarg; |
||||
extern int opterr; |
||||
extern int optind; |
||||
extern int optopt; |
||||
|
||||
int strcasecmp(const char *s1, const char *s2); |
||||
|
||||
#ifndef HAVE_GETOPT |
||||
# define HAVE_GETOPT 1 |
||||
#endif |
||||
|
||||
#if HAVE_STRTOL |
||||
long strtol(const char *nptr, char **endptr, int base); |
||||
#endif |
||||
#if HAVE_STRTOLL |
||||
long long strtoll(const char *nptr, char **endptr, int base); |
||||
#endif |
||||
#if HAVE_STRTOUL |
||||
unsigned long strtoul(const char *nptr, char **endptr, int base); |
||||
#endif |
||||
#if HAVE_STRTOULL |
||||
unsigned long long strtoull(const char *nptr, char **endptr, int base); |
||||
#endif |
||||
|
||||
#if 0 |
||||
void * |
||||
lfind(const void *key, const void *base, size_t *nmemb, size_t size, |
||||
int(*compar)(const void *, const void *)); |
||||
#endif |
||||
|
||||
#if !defined(HAVE_SNPRINTF) |
||||
#undef vsnprintf |
||||
#define vsnprintf _TIFF_vsnprintf_f |
||||
|
||||
#undef snprintf |
||||
#define snprintf _TIFF_snprintf_f |
||||
int snprintf(char* str, size_t size, const char* format, ...); |
||||
#endif |
||||
|
||||
#endif /* ndef _LIBPORT_ */ |
@ -0,0 +1,684 @@ |
||||
/*
|
||||
* Copyright (c) 2018, Mapbox |
||||
* Author: <norman.barker at mapbox.com> |
||||
* |
||||
* Permission to use, copy, modify, distribute, and sell this software and |
||||
* its documentation for any purpose is hereby granted without fee, provided |
||||
* that (i) the above copyright notices and this permission notice appear in |
||||
* all copies of the software and related documentation, and (ii) the names of |
||||
* Sam Leffler and Silicon Graphics may not be used in any advertising or |
||||
* publicity relating to the software without the specific, prior written |
||||
* permission of Sam Leffler and Silicon Graphics. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY |
||||
* WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. |
||||
* |
||||
* IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR |
||||
* ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, |
||||
* OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
||||
* WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF |
||||
* LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
||||
* OF THIS SOFTWARE. |
||||
*/ |
||||
|
||||
#include "tiffiop.h" |
||||
#ifdef WEBP_SUPPORT |
||||
/*
|
||||
* TIFF Library. |
||||
* |
||||
* WEBP Compression Support |
||||
* |
||||
*/ |
||||
|
||||
#include "webp/decode.h" |
||||
#include "webp/encode.h" |
||||
|
||||
#include <stdio.h> |
||||
|
||||
#define LSTATE_INIT_DECODE 0x01 |
||||
#define LSTATE_INIT_ENCODE 0x02 |
||||
/*
|
||||
* State block for each open TIFF |
||||
* file using WEBP compression/decompression. |
||||
*/ |
||||
typedef struct { |
||||
uint16 nSamples; /* number of samples per pixel */ |
||||
|
||||
int lossless; /* lossy/lossless compression */ |
||||
int quality_level; /* compression level */ |
||||
WebPPicture sPicture; /* WebP Picture */ |
||||
WebPConfig sEncoderConfig; /* WebP encoder config */ |
||||
uint8* pBuffer; /* buffer to hold raw data on encoding */ |
||||
unsigned int buffer_offset; /* current offset into the buffer */ |
||||
unsigned int buffer_size; |
||||
|
||||
WebPIDecoder* psDecoder; /* WebPIDecoder */ |
||||
WebPDecBuffer sDecBuffer; /* Decoder buffer */ |
||||
int last_y; /* Last row decoded */ |
||||
|
||||
int state; /* state flags */ |
||||
|
||||
TIFFVGetMethod vgetparent; /* super-class method */ |
||||
TIFFVSetMethod vsetparent; /* super-class method */ |
||||
} WebPState; |
||||
|
||||
#define LState(tif) ((WebPState*) (tif)->tif_data) |
||||
#define DecoderState(tif) LState(tif) |
||||
#define EncoderState(tif) LState(tif) |
||||
|
||||
static int TWebPEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s); |
||||
static int TWebPDecode(TIFF* tif, uint8* op, tmsize_t occ, uint16 s); |
||||
|
||||
static |
||||
int TWebPDatasetWriter(const uint8_t* data, size_t data_size, |
||||
const WebPPicture* const picture) |
||||
{ |
||||
static const char module[] = "TWebPDatasetWriter"; |
||||
TIFF* tif = (TIFF*)(picture->custom_ptr); |
||||
|
||||
if ( (tif->tif_rawcc + (tmsize_t)data_size) > tif->tif_rawdatasize ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Buffer too small by " TIFF_SIZE_FORMAT " bytes.", |
||||
(size_t) (tif->tif_rawcc + data_size - tif->tif_rawdatasize)); |
||||
return 0; |
||||
} else { |
||||
_TIFFmemcpy(tif->tif_rawcp, data, data_size); |
||||
tif->tif_rawcc += data_size; |
||||
tif->tif_rawcp += data_size; |
||||
return 1;
|
||||
} |
||||
} |
||||
|
||||
/*
|
||||
* Encode a chunk of pixels. |
||||
*/ |
||||
static int |
||||
TWebPEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) |
||||
{ |
||||
static const char module[] = "TWebPEncode"; |
||||
WebPState *sp = EncoderState(tif); |
||||
(void) s; |
||||
|
||||
assert(sp != NULL); |
||||
assert(sp->state == LSTATE_INIT_ENCODE); |
||||
|
||||
if( (uint64)sp->buffer_offset + |
||||
(uint64)cc > sp->buffer_size ) |
||||
{ |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Too many bytes to be written"); |
||||
return 0; |
||||
} |
||||
|
||||
memcpy(sp->pBuffer + sp->buffer_offset, |
||||
bp, cc); |
||||
sp->buffer_offset += (unsigned)cc; |
||||
|
||||
return 1; |
||||
|
||||
} |
||||
|
||||
static int |
||||
TWebPDecode(TIFF* tif, uint8* op, tmsize_t occ, uint16 s) |
||||
{ |
||||
static const char module[] = "WebPDecode"; |
||||
VP8StatusCode status = VP8_STATUS_OK; |
||||
WebPState *sp = DecoderState(tif); |
||||
(void) s;
|
||||
|
||||
assert(sp != NULL); |
||||
assert(sp->state == LSTATE_INIT_DECODE); |
||||
|
||||
if (occ % sp->sDecBuffer.u.RGBA.stride) |
||||
{ |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Fractional scanlines cannot be read"); |
||||
return 0; |
||||
} |
||||
|
||||
status = WebPIAppend(sp->psDecoder, tif->tif_rawcp, tif->tif_rawcc); |
||||
|
||||
if (status != VP8_STATUS_OK && status != VP8_STATUS_SUSPENDED) { |
||||
if (status == VP8_STATUS_INVALID_PARAM) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Invalid parameter used.");
|
||||
} else if (status == VP8_STATUS_OUT_OF_MEMORY) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Out of memory.");
|
||||
} else { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Unrecognized error.");
|
||||
} |
||||
return 0; |
||||
} else { |
||||
int current_y, stride; |
||||
uint8_t* buf; |
||||
|
||||
/* Returns the RGB/A image decoded so far */ |
||||
buf = WebPIDecGetRGB(sp->psDecoder, ¤t_y, NULL, NULL, &stride); |
||||
|
||||
if ((buf != NULL) && |
||||
(occ <= stride * (current_y - sp->last_y))) { |
||||
memcpy(op,
|
||||
buf + (sp->last_y * stride), |
||||
occ); |
||||
|
||||
tif->tif_rawcp += tif->tif_rawcc; |
||||
tif->tif_rawcc = 0; |
||||
sp->last_y += occ / sp->sDecBuffer.u.RGBA.stride; |
||||
return 1; |
||||
} else { |
||||
TIFFErrorExt(tif->tif_clientdata, module, "Unable to decode WebP data.");
|
||||
return 0; |
||||
} |
||||
} |
||||
} |
||||
|
||||
static int |
||||
TWebPFixupTags(TIFF* tif) |
||||
{ |
||||
(void) tif; |
||||
if (tif->tif_dir.td_planarconfig != PLANARCONFIG_CONTIG) { |
||||
static const char module[] = "TWebPFixupTags"; |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"TIFF WEBP requires data to be stored contiguously in RGB e.g. RGBRGBRGB " |
||||
#if WEBP_ENCODER_ABI_VERSION >= 0x0100 |
||||
"or RGBARGBARGBA" |
||||
#endif |
||||
); |
||||
return 0; |
||||
} |
||||
return 1; |
||||
} |
||||
|
||||
static int |
||||
TWebPSetupDecode(TIFF* tif) |
||||
{ |
||||
static const char module[] = "WebPSetupDecode"; |
||||
uint16 nBitsPerSample = tif->tif_dir.td_bitspersample; |
||||
uint16 sampleFormat = tif->tif_dir.td_sampleformat; |
||||
|
||||
WebPState* sp = DecoderState(tif); |
||||
assert(sp != NULL); |
||||
|
||||
sp->nSamples = tif->tif_dir.td_samplesperpixel; |
||||
|
||||
/* check band count */ |
||||
if ( sp->nSamples != 3 |
||||
#if WEBP_ENCODER_ABI_VERSION >= 0x0100 |
||||
&& sp->nSamples != 4 |
||||
#endif |
||||
) |
||||
{ |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"WEBP driver doesn't support %d bands. Must be 3 (RGB) " |
||||
#if WEBP_ENCODER_ABI_VERSION >= 0x0100 |
||||
"or 4 (RGBA) " |
||||
#endif |
||||
"bands.", |
||||
sp->nSamples ); |
||||
return 0; |
||||
} |
||||
|
||||
/* check bits per sample and data type */ |
||||
if ((nBitsPerSample != 8) && (sampleFormat != 1)) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"WEBP driver requires 8 bit unsigned data"); |
||||
return 0; |
||||
} |
||||
|
||||
/* if we were last encoding, terminate this mode */ |
||||
if (sp->state & LSTATE_INIT_ENCODE) { |
||||
WebPPictureFree(&sp->sPicture); |
||||
if (sp->pBuffer != NULL) { |
||||
_TIFFfree(sp->pBuffer); |
||||
sp->pBuffer = NULL; |
||||
} |
||||
sp->buffer_offset = 0; |
||||
sp->state = 0; |
||||
} |
||||
|
||||
sp->state |= LSTATE_INIT_DECODE; |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
/*
|
||||
* Setup state for decoding a strip. |
||||
*/ |
||||
static int |
||||
TWebPPreDecode(TIFF* tif, uint16 s) |
||||
{ |
||||
static const char module[] = "TWebPPreDecode"; |
||||
uint32 segment_width, segment_height; |
||||
WebPState* sp = DecoderState(tif); |
||||
TIFFDirectory* td = &tif->tif_dir; |
||||
(void) s; |
||||
assert(sp != NULL); |
||||
|
||||
if (isTiled(tif)) { |
||||
segment_width = td->td_tilewidth; |
||||
segment_height = td->td_tilelength; |
||||
} else { |
||||
segment_width = td->td_imagewidth; |
||||
segment_height = td->td_imagelength - tif->tif_row; |
||||
if (segment_height > td->td_rowsperstrip) |
||||
segment_height = td->td_rowsperstrip; |
||||
} |
||||
|
||||
if( (sp->state & LSTATE_INIT_DECODE) == 0 ) |
||||
tif->tif_setupdecode(tif); |
||||
|
||||
if (sp->psDecoder != NULL) { |
||||
WebPIDelete(sp->psDecoder); |
||||
WebPFreeDecBuffer(&sp->sDecBuffer); |
||||
sp->psDecoder = NULL; |
||||
} |
||||
|
||||
sp->last_y = 0; |
||||
|
||||
WebPInitDecBuffer(&sp->sDecBuffer); |
||||
|
||||
sp->sDecBuffer.is_external_memory = 0; |
||||
sp->sDecBuffer.width = segment_width; |
||||
sp->sDecBuffer.height = segment_height; |
||||
sp->sDecBuffer.u.RGBA.stride = segment_width * sp->nSamples; |
||||
sp->sDecBuffer.u.RGBA.size = segment_width * sp->nSamples * segment_height; |
||||
|
||||
if (sp->nSamples > 3) { |
||||
sp->sDecBuffer.colorspace = MODE_RGBA; |
||||
} else { |
||||
sp->sDecBuffer.colorspace = MODE_RGB; |
||||
} |
||||
|
||||
sp->psDecoder = WebPINewDecoder(&sp->sDecBuffer); |
||||
|
||||
if (sp->psDecoder == NULL) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Unable to allocate WebP decoder."); |
||||
return 0; |
||||
} |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
static int |
||||
TWebPSetupEncode(TIFF* tif) |
||||
{ |
||||
static const char module[] = "WebPSetupEncode"; |
||||
uint16 nBitsPerSample = tif->tif_dir.td_bitspersample; |
||||
uint16 sampleFormat = tif->tif_dir.td_sampleformat; |
||||
|
||||
WebPState* sp = EncoderState(tif); |
||||
assert(sp != NULL); |
||||
|
||||
sp->nSamples = tif->tif_dir.td_samplesperpixel; |
||||
|
||||
/* check band count */ |
||||
if ( sp->nSamples != 3 |
||||
#if WEBP_ENCODER_ABI_VERSION >= 0x0100 |
||||
&& sp->nSamples != 4 |
||||
#endif |
||||
) |
||||
{ |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"WEBP driver doesn't support %d bands. Must be 3 (RGB) " |
||||
#if WEBP_ENCODER_ABI_VERSION >= 0x0100 |
||||
"or 4 (RGBA) " |
||||
#endif |
||||
"bands.", |
||||
sp->nSamples ); |
||||
return 0; |
||||
} |
||||
|
||||
/* check bits per sample and data type */ |
||||
if ((nBitsPerSample != 8) && (sampleFormat != 1)) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"WEBP driver requires 8 bit unsigned data"); |
||||
return 0; |
||||
} |
||||
|
||||
if (sp->state & LSTATE_INIT_DECODE) { |
||||
WebPIDelete(sp->psDecoder); |
||||
WebPFreeDecBuffer(&sp->sDecBuffer); |
||||
sp->psDecoder = NULL; |
||||
sp->last_y = 0; |
||||
sp->state = 0; |
||||
} |
||||
|
||||
sp->state |= LSTATE_INIT_ENCODE; |
||||
|
||||
if (!WebPConfigInitInternal(&sp->sEncoderConfig, WEBP_PRESET_DEFAULT, |
||||
sp->quality_level, |
||||
WEBP_ENCODER_ABI_VERSION)) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error creating WebP encoder configuration."); |
||||
return 0; |
||||
} |
||||
|
||||
#if WEBP_ENCODER_ABI_VERSION >= 0x0100 |
||||
sp->sEncoderConfig.lossless = sp->lossless; |
||||
#endif |
||||
|
||||
if (!WebPValidateConfig(&sp->sEncoderConfig)) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error with WebP encoder configuration."); |
||||
return 0; |
||||
} |
||||
|
||||
if (!WebPPictureInit(&sp->sPicture)) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error initializing WebP picture."); |
||||
return 0; |
||||
} |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
/*
|
||||
* Reset encoding state at the start of a strip. |
||||
*/ |
||||
static int |
||||
TWebPPreEncode(TIFF* tif, uint16 s) |
||||
{ |
||||
static const char module[] = "TWebPPreEncode"; |
||||
uint32 segment_width, segment_height; |
||||
WebPState *sp = EncoderState(tif); |
||||
TIFFDirectory* td = &tif->tif_dir; |
||||
|
||||
(void) s; |
||||
|
||||
assert(sp != NULL); |
||||
if( sp->state != LSTATE_INIT_ENCODE ) |
||||
tif->tif_setupencode(tif); |
||||
|
||||
/*
|
||||
* Set encoding parameters for this strip/tile. |
||||
*/ |
||||
if (isTiled(tif)) { |
||||
segment_width = td->td_tilewidth; |
||||
segment_height = td->td_tilelength; |
||||
} else { |
||||
segment_width = td->td_imagewidth; |
||||
segment_height = td->td_imagelength - tif->tif_row; |
||||
if (segment_height > td->td_rowsperstrip) |
||||
segment_height = td->td_rowsperstrip; |
||||
} |
||||
|
||||
if( segment_width > 16383 || segment_height > 16383 ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module,
|
||||
"WEBP maximum image dimensions are 16383 x 16383."); |
||||
return 0; |
||||
} |
||||
|
||||
/* set up buffer for raw data */ |
||||
/* given above check and that nSamples <= 4, buffer_size is <= 1 GB */ |
||||
sp->buffer_size = segment_width * segment_height * sp->nSamples; |
||||
sp->pBuffer = _TIFFmalloc(sp->buffer_size); |
||||
if( !sp->pBuffer) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, "Cannot allocate buffer"); |
||||
return 0; |
||||
} |
||||
sp->buffer_offset = 0; |
||||
|
||||
sp->sPicture.width = segment_width; |
||||
sp->sPicture.height = segment_height; |
||||
sp->sPicture.writer = TWebPDatasetWriter; |
||||
sp->sPicture.custom_ptr = tif; |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
/*
|
||||
* Finish off an encoded strip by flushing it. |
||||
*/ |
||||
static int |
||||
TWebPPostEncode(TIFF* tif) |
||||
{ |
||||
static const char module[] = "WebPPostEncode"; |
||||
int64_t stride; |
||||
WebPState *sp = EncoderState(tif); |
||||
assert(sp != NULL); |
||||
|
||||
assert(sp->state == LSTATE_INIT_ENCODE); |
||||
|
||||
stride = (int64_t)sp->sPicture.width * sp->nSamples; |
||||
|
||||
#if WEBP_ENCODER_ABI_VERSION >= 0x0100 |
||||
if (sp->nSamples == 4) { |
||||
if (!WebPPictureImportRGBA(&sp->sPicture, sp->pBuffer, (int)stride)) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"WebPPictureImportRGBA() failed" ); |
||||
return 0; |
||||
} |
||||
} |
||||
else |
||||
#endif |
||||
if (!WebPPictureImportRGB(&sp->sPicture, sp->pBuffer, (int)stride)) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"WebPPictureImportRGB() failed"); |
||||
return 0; |
||||
} |
||||
|
||||
if (!WebPEncode(&sp->sEncoderConfig, &sp->sPicture)) { |
||||
|
||||
#if WEBP_ENCODER_ABI_VERSION >= 0x0100 |
||||
const char* pszErrorMsg = NULL; |
||||
switch(sp->sPicture.error_code) { |
||||
case VP8_ENC_ERROR_OUT_OF_MEMORY: |
||||
pszErrorMsg = "Out of memory"; break; |
||||
case VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY: |
||||
pszErrorMsg = "Out of memory while flushing bits"; break; |
||||
case VP8_ENC_ERROR_NULL_PARAMETER: |
||||
pszErrorMsg = "A pointer parameter is NULL"; break; |
||||
case VP8_ENC_ERROR_INVALID_CONFIGURATION: |
||||
pszErrorMsg = "Configuration is invalid"; break; |
||||
case VP8_ENC_ERROR_BAD_DIMENSION: |
||||
pszErrorMsg = "Picture has invalid width/height"; break; |
||||
case VP8_ENC_ERROR_PARTITION0_OVERFLOW: |
||||
pszErrorMsg = "Partition is bigger than 512k. Try using less " |
||||
"SEGMENTS, or increase PARTITION_LIMIT value"; |
||||
break; |
||||
case VP8_ENC_ERROR_PARTITION_OVERFLOW: |
||||
pszErrorMsg = "Partition is bigger than 16M"; |
||||
break; |
||||
case VP8_ENC_ERROR_BAD_WRITE: |
||||
pszErrorMsg = "Error while fludshing bytes"; break; |
||||
case VP8_ENC_ERROR_FILE_TOO_BIG: |
||||
pszErrorMsg = "File is bigger than 4G"; break; |
||||
case VP8_ENC_ERROR_USER_ABORT: |
||||
pszErrorMsg = "User interrupted"; |
||||
break; |
||||
default: |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"WebPEncode returned an unknown error code: %d", |
||||
sp->sPicture.error_code); |
||||
pszErrorMsg = "Unknown WebP error type."; |
||||
break; |
||||
} |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"WebPEncode() failed : %s", pszErrorMsg); |
||||
#else |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error in WebPEncode()"); |
||||
#endif |
||||
return 0; |
||||
} |
||||
|
||||
sp->sPicture.custom_ptr = NULL; |
||||
|
||||
if (!TIFFFlushData1(tif)) |
||||
{ |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error flushing TIFF WebP encoder."); |
||||
return 0; |
||||
} |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
static void |
||||
TWebPCleanup(TIFF* tif) |
||||
{ |
||||
WebPState* sp = LState(tif); |
||||
|
||||
assert(sp != 0); |
||||
|
||||
tif->tif_tagmethods.vgetfield = sp->vgetparent; |
||||
tif->tif_tagmethods.vsetfield = sp->vsetparent; |
||||
|
||||
if (sp->state & LSTATE_INIT_ENCODE) { |
||||
WebPPictureFree(&sp->sPicture); |
||||
} |
||||
|
||||
if (sp->psDecoder != NULL) { |
||||
WebPIDelete(sp->psDecoder); |
||||
WebPFreeDecBuffer(&sp->sDecBuffer); |
||||
sp->psDecoder = NULL; |
||||
sp->last_y = 0; |
||||
} |
||||
|
||||
if (sp->pBuffer != NULL) { |
||||
_TIFFfree(sp->pBuffer); |
||||
sp->pBuffer = NULL; |
||||
} |
||||
|
||||
if (tif->tif_data) { |
||||
_TIFFfree(tif->tif_data); |
||||
tif->tif_data = NULL; |
||||
} |
||||
|
||||
_TIFFSetDefaultCompressionState(tif); |
||||
} |
||||
|
||||
static int |
||||
TWebPVSetField(TIFF* tif, uint32 tag, va_list ap) |
||||
{ |
||||
static const char module[] = "WebPVSetField"; |
||||
WebPState* sp = LState(tif); |
||||
|
||||
switch (tag) { |
||||
case TIFFTAG_WEBP_LEVEL: |
||||
sp->quality_level = (int) va_arg(ap, int); |
||||
if( sp->quality_level <= 0 || |
||||
sp->quality_level > 100.0f ) { |
||||
TIFFWarningExt(tif->tif_clientdata, module, |
||||
"WEBP_LEVEL should be between 1 and 100"); |
||||
} |
||||
return 1; |
||||
case TIFFTAG_WEBP_LOSSLESS: |
||||
#if WEBP_ENCODER_ABI_VERSION >= 0x0100 |
||||
sp->lossless = va_arg(ap, int); |
||||
return 1; |
||||
#else |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Need to upgrade WEBP driver, this version doesn't support " |
||||
"lossless compression."); |
||||
return 0; |
||||
#endif |
||||
default: |
||||
return (*sp->vsetparent)(tif, tag, ap); |
||||
} |
||||
/*NOTREACHED*/ |
||||
} |
||||
|
||||
static int |
||||
TWebPVGetField(TIFF* tif, uint32 tag, va_list ap) |
||||
{ |
||||
WebPState* sp = LState(tif); |
||||
|
||||
switch (tag) { |
||||
case TIFFTAG_WEBP_LEVEL: |
||||
*va_arg(ap, int*) = sp->quality_level; |
||||
break; |
||||
case TIFFTAG_WEBP_LOSSLESS: |
||||
*va_arg(ap, int*) = sp->lossless; |
||||
break; |
||||
default: |
||||
return (*sp->vgetparent)(tif, tag, ap); |
||||
} |
||||
return 1; |
||||
} |
||||
|
||||
static const TIFFField TWebPFields[] = { |
||||
{ TIFFTAG_WEBP_LEVEL, 0, 0, TIFF_ANY, 0, TIFF_SETGET_INT, |
||||
TIFF_SETGET_UNDEFINED, |
||||
FIELD_PSEUDO, TRUE, FALSE, "WEBP quality", NULL }, |
||||
{ TIFFTAG_WEBP_LOSSLESS, 0, 0, TIFF_ANY, 0, TIFF_SETGET_INT, |
||||
TIFF_SETGET_UNDEFINED, |
||||
FIELD_PSEUDO, TRUE, FALSE, "WEBP lossless/lossy", NULL |
||||
}, |
||||
}; |
||||
|
||||
int |
||||
TIFFInitWebP(TIFF* tif, int scheme) |
||||
{ |
||||
static const char module[] = "TIFFInitWebP"; |
||||
WebPState* sp; |
||||
|
||||
assert( scheme == COMPRESSION_WEBP ); |
||||
|
||||
/*
|
||||
* Merge codec-specific tag information. |
||||
*/ |
||||
if ( !_TIFFMergeFields(tif, TWebPFields, TIFFArrayCount(TWebPFields)) ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Merging WebP codec-specific tags failed"); |
||||
return 0; |
||||
} |
||||
|
||||
/*
|
||||
* Allocate state block so tag methods have storage to record values. |
||||
*/ |
||||
tif->tif_data = (uint8*) _TIFFmalloc(sizeof(WebPState)); |
||||
if (tif->tif_data == NULL) |
||||
goto bad; |
||||
sp = LState(tif); |
||||
|
||||
/*
|
||||
* Override parent get/set field methods. |
||||
*/ |
||||
sp->vgetparent = tif->tif_tagmethods.vgetfield; |
||||
tif->tif_tagmethods.vgetfield = TWebPVGetField; /* hook for codec tags */ |
||||
sp->vsetparent = tif->tif_tagmethods.vsetfield; |
||||
tif->tif_tagmethods.vsetfield = TWebPVSetField; /* hook for codec tags */ |
||||
|
||||
/* Default values for codec-specific fields */ |
||||
sp->quality_level = 75.0f; /* default comp. level */ |
||||
sp->lossless = 0; /* default to false */ |
||||
sp->state = 0; |
||||
sp->nSamples = 0; |
||||
sp->psDecoder = NULL; |
||||
sp->last_y = 0; |
||||
|
||||
sp->buffer_offset = 0; |
||||
sp->pBuffer = NULL; |
||||
|
||||
/*
|
||||
* Install codec methods. |
||||
* Notes: |
||||
* encoderow is not supported |
||||
*/ |
||||
tif->tif_fixuptags = TWebPFixupTags; |
||||
tif->tif_setupdecode = TWebPSetupDecode; |
||||
tif->tif_predecode = TWebPPreDecode; |
||||
tif->tif_decoderow = TWebPDecode; |
||||
tif->tif_decodestrip = TWebPDecode; |
||||
tif->tif_decodetile = TWebPDecode; |
||||
tif->tif_setupencode = TWebPSetupEncode; |
||||
tif->tif_preencode = TWebPPreEncode; |
||||
tif->tif_postencode = TWebPPostEncode; |
||||
tif->tif_encoderow = TWebPEncode; |
||||
tif->tif_encodestrip = TWebPEncode; |
||||
tif->tif_encodetile = TWebPEncode; |
||||
tif->tif_cleanup = TWebPCleanup; |
||||
|
||||
return 1; |
||||
bad: |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"No space for WebP state block"); |
||||
return 0; |
||||
} |
||||
|
||||
#endif /* WEBP_SUPPORT */ |
@ -0,0 +1,440 @@ |
||||
/*
|
||||
* Copyright (c) 2017, Planet Labs |
||||
* Author: <even.rouault at spatialys.com> |
||||
* |
||||
* Permission to use, copy, modify, distribute, and sell this software and |
||||
* its documentation for any purpose is hereby granted without fee, provided |
||||
* that (i) the above copyright notices and this permission notice appear in |
||||
* all copies of the software and related documentation, and (ii) the names of |
||||
* Sam Leffler and Silicon Graphics may not be used in any advertising or |
||||
* publicity relating to the software without the specific, prior written |
||||
* permission of Sam Leffler and Silicon Graphics. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY |
||||
* WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. |
||||
* |
||||
* IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR |
||||
* ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, |
||||
* OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
||||
* WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF |
||||
* LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
||||
* OF THIS SOFTWARE. |
||||
*/ |
||||
|
||||
#include "tiffiop.h" |
||||
#ifdef ZSTD_SUPPORT |
||||
/*
|
||||
* TIFF Library. |
||||
* |
||||
* ZSTD Compression Support |
||||
* |
||||
*/ |
||||
|
||||
#include "tif_predict.h" |
||||
#include "zstd.h" |
||||
|
||||
#include <stdio.h> |
||||
|
||||
/*
|
||||
* State block for each open TIFF file using ZSTD compression/decompression. |
||||
*/ |
||||
typedef struct { |
||||
TIFFPredictorState predict; |
||||
ZSTD_DStream* dstream; |
||||
ZSTD_CStream* cstream; |
||||
int compression_level; /* compression level */ |
||||
ZSTD_outBuffer out_buffer; |
||||
int state; /* state flags */ |
||||
#define LSTATE_INIT_DECODE 0x01 |
||||
#define LSTATE_INIT_ENCODE 0x02 |
||||
|
||||
TIFFVGetMethod vgetparent; /* super-class method */ |
||||
TIFFVSetMethod vsetparent; /* super-class method */ |
||||
} ZSTDState; |
||||
|
||||
#define LState(tif) ((ZSTDState*) (tif)->tif_data) |
||||
#define DecoderState(tif) LState(tif) |
||||
#define EncoderState(tif) LState(tif) |
||||
|
||||
static int ZSTDEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s); |
||||
static int ZSTDDecode(TIFF* tif, uint8* op, tmsize_t occ, uint16 s); |
||||
|
||||
static int |
||||
ZSTDFixupTags(TIFF* tif) |
||||
{ |
||||
(void) tif; |
||||
return 1; |
||||
} |
||||
|
||||
static int |
||||
ZSTDSetupDecode(TIFF* tif) |
||||
{ |
||||
ZSTDState* sp = DecoderState(tif); |
||||
|
||||
assert(sp != NULL); |
||||
|
||||
/* if we were last encoding, terminate this mode */ |
||||
if (sp->state & LSTATE_INIT_ENCODE) { |
||||
ZSTD_freeCStream(sp->cstream); |
||||
sp->cstream = NULL; |
||||
sp->state = 0; |
||||
} |
||||
|
||||
sp->state |= LSTATE_INIT_DECODE; |
||||
return 1; |
||||
} |
||||
|
||||
/*
|
||||
* Setup state for decoding a strip. |
||||
*/ |
||||
static int |
||||
ZSTDPreDecode(TIFF* tif, uint16 s) |
||||
{ |
||||
static const char module[] = "ZSTDPreDecode"; |
||||
ZSTDState* sp = DecoderState(tif); |
||||
size_t zstd_ret; |
||||
|
||||
(void) s; |
||||
assert(sp != NULL); |
||||
|
||||
if( (sp->state & LSTATE_INIT_DECODE) == 0 ) |
||||
tif->tif_setupdecode(tif); |
||||
|
||||
if( sp->dstream ) |
||||
{ |
||||
ZSTD_freeDStream(sp->dstream); |
||||
sp->dstream = NULL; |
||||
} |
||||
|
||||
sp->dstream = ZSTD_createDStream(); |
||||
if( sp->dstream == NULL ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Cannot allocate decompression stream"); |
||||
return 0; |
||||
} |
||||
zstd_ret = ZSTD_initDStream(sp->dstream); |
||||
if( ZSTD_isError(zstd_ret) ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error in ZSTD_initDStream(): %s", |
||||
ZSTD_getErrorName(zstd_ret)); |
||||
return 0; |
||||
} |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
static int |
||||
ZSTDDecode(TIFF* tif, uint8* op, tmsize_t occ, uint16 s) |
||||
{ |
||||
static const char module[] = "ZSTDDecode"; |
||||
ZSTDState* sp = DecoderState(tif); |
||||
ZSTD_inBuffer in_buffer; |
||||
ZSTD_outBuffer out_buffer; |
||||
size_t zstd_ret; |
||||
|
||||
(void) s; |
||||
assert(sp != NULL); |
||||
assert(sp->state == LSTATE_INIT_DECODE); |
||||
|
||||
in_buffer.src = tif->tif_rawcp; |
||||
in_buffer.size = (size_t) tif->tif_rawcc; |
||||
in_buffer.pos = 0; |
||||
|
||||
out_buffer.dst = op; |
||||
out_buffer.size = (size_t) occ; |
||||
out_buffer.pos = 0; |
||||
|
||||
do { |
||||
zstd_ret = ZSTD_decompressStream(sp->dstream, &out_buffer, |
||||
&in_buffer); |
||||
if( ZSTD_isError(zstd_ret) ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error in ZSTD_decompressStream(): %s", |
||||
ZSTD_getErrorName(zstd_ret)); |
||||
return 0; |
||||
} |
||||
} while( zstd_ret != 0 && |
||||
in_buffer.pos < in_buffer.size && |
||||
out_buffer.pos < out_buffer.size ); |
||||
|
||||
if (out_buffer.pos < (size_t)occ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Not enough data at scanline %lu (short %lu bytes)", |
||||
(unsigned long) tif->tif_row, |
||||
(unsigned long) (size_t)occ - out_buffer.pos); |
||||
return 0; |
||||
} |
||||
|
||||
tif->tif_rawcp += in_buffer.pos; |
||||
tif->tif_rawcc -= in_buffer.pos; |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
static int |
||||
ZSTDSetupEncode(TIFF* tif) |
||||
{ |
||||
ZSTDState* sp = EncoderState(tif); |
||||
|
||||
assert(sp != NULL); |
||||
if (sp->state & LSTATE_INIT_DECODE) { |
||||
ZSTD_freeDStream(sp->dstream); |
||||
sp->dstream = NULL; |
||||
sp->state = 0; |
||||
} |
||||
|
||||
sp->state |= LSTATE_INIT_ENCODE; |
||||
return 1; |
||||
} |
||||
|
||||
/*
|
||||
* Reset encoding state at the start of a strip. |
||||
*/ |
||||
static int |
||||
ZSTDPreEncode(TIFF* tif, uint16 s) |
||||
{ |
||||
static const char module[] = "ZSTDPreEncode"; |
||||
ZSTDState *sp = EncoderState(tif); |
||||
size_t zstd_ret; |
||||
|
||||
(void) s; |
||||
assert(sp != NULL); |
||||
if( sp->state != LSTATE_INIT_ENCODE ) |
||||
tif->tif_setupencode(tif); |
||||
|
||||
if (sp->cstream) { |
||||
ZSTD_freeCStream(sp->cstream); |
||||
sp->cstream = NULL; |
||||
} |
||||
sp->cstream = ZSTD_createCStream(); |
||||
if( sp->cstream == NULL ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Cannot allocate compression stream"); |
||||
return 0; |
||||
} |
||||
|
||||
zstd_ret = ZSTD_initCStream(sp->cstream, sp->compression_level); |
||||
if( ZSTD_isError(zstd_ret) ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error in ZSTD_initCStream(): %s", |
||||
ZSTD_getErrorName(zstd_ret)); |
||||
return 0; |
||||
} |
||||
|
||||
sp->out_buffer.dst = tif->tif_rawdata; |
||||
sp->out_buffer.size = (size_t)tif->tif_rawdatasize; |
||||
sp->out_buffer.pos = 0; |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
/*
|
||||
* Encode a chunk of pixels. |
||||
*/ |
||||
static int |
||||
ZSTDEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) |
||||
{ |
||||
static const char module[] = "ZSTDEncode"; |
||||
ZSTDState *sp = EncoderState(tif); |
||||
ZSTD_inBuffer in_buffer; |
||||
size_t zstd_ret; |
||||
|
||||
assert(sp != NULL); |
||||
assert(sp->state == LSTATE_INIT_ENCODE); |
||||
|
||||
(void) s; |
||||
|
||||
in_buffer.src = bp; |
||||
in_buffer.size = (size_t)cc; |
||||
in_buffer.pos = 0; |
||||
|
||||
do { |
||||
zstd_ret = ZSTD_compressStream(sp->cstream, &sp->out_buffer, |
||||
&in_buffer); |
||||
if( ZSTD_isError(zstd_ret) ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error in ZSTD_compressStream(): %s", |
||||
ZSTD_getErrorName(zstd_ret)); |
||||
return 0; |
||||
} |
||||
if( sp->out_buffer.pos == sp->out_buffer.size ) { |
||||
tif->tif_rawcc = tif->tif_rawdatasize; |
||||
TIFFFlushData1(tif); |
||||
sp->out_buffer.dst = tif->tif_rawcp; |
||||
sp->out_buffer.pos = 0; |
||||
} |
||||
} while( in_buffer.pos < in_buffer.size ); |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
/*
|
||||
* Finish off an encoded strip by flushing it. |
||||
*/ |
||||
static int |
||||
ZSTDPostEncode(TIFF* tif) |
||||
{ |
||||
static const char module[] = "ZSTDPostEncode"; |
||||
ZSTDState *sp = EncoderState(tif); |
||||
size_t zstd_ret; |
||||
|
||||
do { |
||||
zstd_ret = ZSTD_endStream(sp->cstream, &sp->out_buffer); |
||||
if( ZSTD_isError(zstd_ret) ) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Error in ZSTD_endStream(): %s", |
||||
ZSTD_getErrorName(zstd_ret)); |
||||
return 0; |
||||
} |
||||
if( sp->out_buffer.pos > 0 ) { |
||||
tif->tif_rawcc = sp->out_buffer.pos; |
||||
TIFFFlushData1(tif); |
||||
sp->out_buffer.dst = tif->tif_rawcp; |
||||
sp->out_buffer.pos = 0; |
||||
} |
||||
} while (zstd_ret != 0); |
||||
return 1; |
||||
} |
||||
|
||||
static void |
||||
ZSTDCleanup(TIFF* tif) |
||||
{ |
||||
ZSTDState* sp = LState(tif); |
||||
|
||||
assert(sp != 0); |
||||
|
||||
(void)TIFFPredictorCleanup(tif); |
||||
|
||||
tif->tif_tagmethods.vgetfield = sp->vgetparent; |
||||
tif->tif_tagmethods.vsetfield = sp->vsetparent; |
||||
|
||||
if (sp->dstream) { |
||||
ZSTD_freeDStream(sp->dstream); |
||||
sp->dstream = NULL; |
||||
} |
||||
if (sp->cstream) { |
||||
ZSTD_freeCStream(sp->cstream); |
||||
sp->cstream = NULL; |
||||
} |
||||
_TIFFfree(sp); |
||||
tif->tif_data = NULL; |
||||
|
||||
_TIFFSetDefaultCompressionState(tif); |
||||
} |
||||
|
||||
static int |
||||
ZSTDVSetField(TIFF* tif, uint32 tag, va_list ap) |
||||
{ |
||||
static const char module[] = "ZSTDVSetField"; |
||||
ZSTDState* sp = LState(tif); |
||||
|
||||
switch (tag) { |
||||
case TIFFTAG_ZSTD_LEVEL: |
||||
sp->compression_level = (int) va_arg(ap, int); |
||||
if( sp->compression_level <= 0 || |
||||
sp->compression_level > ZSTD_maxCLevel() ) |
||||
{ |
||||
TIFFWarningExt(tif->tif_clientdata, module, |
||||
"ZSTD_LEVEL should be between 1 and %d", |
||||
ZSTD_maxCLevel()); |
||||
} |
||||
return 1; |
||||
default: |
||||
return (*sp->vsetparent)(tif, tag, ap); |
||||
} |
||||
/*NOTREACHED*/ |
||||
} |
||||
|
||||
static int |
||||
ZSTDVGetField(TIFF* tif, uint32 tag, va_list ap) |
||||
{ |
||||
ZSTDState* sp = LState(tif); |
||||
|
||||
switch (tag) { |
||||
case TIFFTAG_ZSTD_LEVEL: |
||||
*va_arg(ap, int*) = sp->compression_level; |
||||
break; |
||||
default: |
||||
return (*sp->vgetparent)(tif, tag, ap); |
||||
} |
||||
return 1; |
||||
} |
||||
|
||||
static const TIFFField ZSTDFields[] = { |
||||
{ TIFFTAG_ZSTD_LEVEL, 0, 0, TIFF_ANY, 0, TIFF_SETGET_INT, |
||||
TIFF_SETGET_UNDEFINED, |
||||
FIELD_PSEUDO, TRUE, FALSE, "ZSTD compression_level", NULL }, |
||||
}; |
||||
|
||||
int |
||||
TIFFInitZSTD(TIFF* tif, int scheme) |
||||
{ |
||||
static const char module[] = "TIFFInitZSTD"; |
||||
ZSTDState* sp; |
||||
|
||||
assert( scheme == COMPRESSION_ZSTD ); |
||||
|
||||
/*
|
||||
* Merge codec-specific tag information. |
||||
*/ |
||||
if (!_TIFFMergeFields(tif, ZSTDFields, TIFFArrayCount(ZSTDFields))) { |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"Merging ZSTD codec-specific tags failed"); |
||||
return 0; |
||||
} |
||||
|
||||
/*
|
||||
* Allocate state block so tag methods have storage to record values. |
||||
*/ |
||||
tif->tif_data = (uint8*) _TIFFmalloc(sizeof(ZSTDState)); |
||||
if (tif->tif_data == NULL) |
||||
goto bad; |
||||
sp = LState(tif); |
||||
|
||||
/*
|
||||
* Override parent get/set field methods. |
||||
*/ |
||||
sp->vgetparent = tif->tif_tagmethods.vgetfield; |
||||
tif->tif_tagmethods.vgetfield = ZSTDVGetField; /* hook for codec tags */ |
||||
sp->vsetparent = tif->tif_tagmethods.vsetfield; |
||||
tif->tif_tagmethods.vsetfield = ZSTDVSetField; /* hook for codec tags */ |
||||
|
||||
/* Default values for codec-specific fields */ |
||||
sp->compression_level = 9; /* default comp. level */ |
||||
sp->state = 0; |
||||
sp->dstream = 0; |
||||
sp->cstream = 0; |
||||
sp->out_buffer.dst = NULL; |
||||
sp->out_buffer.size = 0; |
||||
sp->out_buffer.pos = 0; |
||||
|
||||
/*
|
||||
* Install codec methods. |
||||
*/ |
||||
tif->tif_fixuptags = ZSTDFixupTags; |
||||
tif->tif_setupdecode = ZSTDSetupDecode; |
||||
tif->tif_predecode = ZSTDPreDecode; |
||||
tif->tif_decoderow = ZSTDDecode; |
||||
tif->tif_decodestrip = ZSTDDecode; |
||||
tif->tif_decodetile = ZSTDDecode; |
||||
tif->tif_setupencode = ZSTDSetupEncode; |
||||
tif->tif_preencode = ZSTDPreEncode; |
||||
tif->tif_postencode = ZSTDPostEncode; |
||||
tif->tif_encoderow = ZSTDEncode; |
||||
tif->tif_encodestrip = ZSTDEncode; |
||||
tif->tif_encodetile = ZSTDEncode; |
||||
tif->tif_cleanup = ZSTDCleanup; |
||||
/*
|
||||
* Setup predictor setup. |
||||
*/ |
||||
(void) TIFFPredictorInit(tif); |
||||
return 1; |
||||
bad: |
||||
TIFFErrorExt(tif->tif_clientdata, module, |
||||
"No space for ZSTD state block"); |
||||
return 0; |
||||
} |
||||
#endif /* ZSTD_SUPPORT */ |
||||
|
||||
/* vim: set ts=8 sts=8 sw=8 noet: */ |
@ -0,0 +1,201 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
namespace cv { |
||||
|
||||
typedef int (*CountNonZeroFunc)(const uchar*, int); |
||||
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN |
||||
|
||||
CountNonZeroFunc getCountNonZeroTab(int depth); |
||||
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY |
||||
|
||||
template<typename T> |
||||
static int countNonZero_(const T* src, int len ) |
||||
{ |
||||
int i=0, nz = 0; |
||||
#if CV_ENABLE_UNROLLED |
||||
for(; i <= len - 4; i += 4 ) |
||||
nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0); |
||||
#endif |
||||
for( ; i < len; i++ ) |
||||
nz += src[i] != 0; |
||||
return nz; |
||||
} |
||||
|
||||
static int countNonZero8u( const uchar* src, int len ) |
||||
{ |
||||
int i=0, nz = 0; |
||||
#if CV_SIMD |
||||
int len0 = len & -v_uint8::nlanes; |
||||
v_uint8 v_zero = vx_setzero_u8(); |
||||
v_uint8 v_one = vx_setall_u8(1); |
||||
|
||||
v_uint32 v_sum32 = vx_setzero_u32(); |
||||
while (i < len0) |
||||
{ |
||||
v_uint16 v_sum16 = vx_setzero_u16(); |
||||
int j = i; |
||||
while (j < std::min(len0, i + 65280 * v_uint16::nlanes)) |
||||
{ |
||||
v_uint8 v_sum8 = vx_setzero_u8(); |
||||
int k = j; |
||||
for (; k < std::min(len0, j + 255 * v_uint8::nlanes); k += v_uint8::nlanes) |
||||
v_sum8 += v_one & (vx_load(src + k) == v_zero); |
||||
v_uint16 part1, part2; |
||||
v_expand(v_sum8, part1, part2); |
||||
v_sum16 += part1 + part2; |
||||
j = k; |
||||
} |
||||
v_uint32 part1, part2; |
||||
v_expand(v_sum16, part1, part2); |
||||
v_sum32 += part1 + part2; |
||||
i = j; |
||||
} |
||||
nz = i - v_reduce_sum(v_sum32); |
||||
v_cleanup(); |
||||
#endif |
||||
for( ; i < len; i++ ) |
||||
nz += src[i] != 0; |
||||
return nz; |
||||
} |
||||
|
||||
static int countNonZero16u( const ushort* src, int len ) |
||||
{ |
||||
int i = 0, nz = 0; |
||||
#if CV_SIMD |
||||
int len0 = len & -v_int8::nlanes; |
||||
v_uint16 v_zero = vx_setzero_u16(); |
||||
v_int8 v_one = vx_setall_s8(1); |
||||
|
||||
v_int32 v_sum32 = vx_setzero_s32(); |
||||
while (i < len0) |
||||
{ |
||||
v_int16 v_sum16 = vx_setzero_s16(); |
||||
int j = i; |
||||
while (j < std::min(len0, i + 32766 * v_int16::nlanes)) |
||||
{ |
||||
v_int8 v_sum8 = vx_setzero_s8(); |
||||
int k = j; |
||||
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes) |
||||
v_sum8 += v_one & v_pack(v_reinterpret_as_s16(vx_load(src + k) == v_zero), v_reinterpret_as_s16(vx_load(src + k + v_uint16::nlanes) == v_zero)); |
||||
v_int16 part1, part2; |
||||
v_expand(v_sum8, part1, part2); |
||||
v_sum16 += part1 + part2; |
||||
j = k; |
||||
} |
||||
v_int32 part1, part2; |
||||
v_expand(v_sum16, part1, part2); |
||||
v_sum32 += part1 + part2; |
||||
i = j; |
||||
} |
||||
nz = i - v_reduce_sum(v_sum32); |
||||
v_cleanup(); |
||||
#endif |
||||
return nz + countNonZero_(src + i, len - i); |
||||
} |
||||
|
||||
static int countNonZero32s( const int* src, int len ) |
||||
{ |
||||
int i = 0, nz = 0; |
||||
#if CV_SIMD |
||||
int len0 = len & -v_int8::nlanes; |
||||
v_int32 v_zero = vx_setzero_s32(); |
||||
v_int8 v_one = vx_setall_s8(1); |
||||
|
||||
v_int32 v_sum32 = vx_setzero_s32(); |
||||
while (i < len0) |
||||
{ |
||||
v_int16 v_sum16 = vx_setzero_s16(); |
||||
int j = i; |
||||
while (j < std::min(len0, i + 32766 * v_int16::nlanes)) |
||||
{ |
||||
v_int8 v_sum8 = vx_setzero_s8(); |
||||
int k = j; |
||||
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes) |
||||
v_sum8 += v_one & v_pack( |
||||
v_pack(vx_load(src + k ) == v_zero, vx_load(src + k + v_int32::nlanes) == v_zero), |
||||
v_pack(vx_load(src + k + 2*v_int32::nlanes) == v_zero, vx_load(src + k + 3*v_int32::nlanes) == v_zero) |
||||
); |
||||
v_int16 part1, part2; |
||||
v_expand(v_sum8, part1, part2); |
||||
v_sum16 += part1 + part2; |
||||
j = k; |
||||
} |
||||
v_int32 part1, part2; |
||||
v_expand(v_sum16, part1, part2); |
||||
v_sum32 += part1 + part2; |
||||
i = j; |
||||
} |
||||
nz = i - v_reduce_sum(v_sum32); |
||||
v_cleanup(); |
||||
#endif |
||||
return nz + countNonZero_(src + i, len - i); |
||||
} |
||||
|
||||
static int countNonZero32f( const float* src, int len ) |
||||
{ |
||||
int i = 0, nz = 0; |
||||
#if CV_SIMD |
||||
int len0 = len & -v_int8::nlanes; |
||||
v_float32 v_zero = vx_setzero_f32(); |
||||
v_int8 v_one = vx_setall_s8(1); |
||||
|
||||
v_int32 v_sum32 = vx_setzero_s32(); |
||||
while (i < len0) |
||||
{ |
||||
v_int16 v_sum16 = vx_setzero_s16(); |
||||
int j = i; |
||||
while (j < std::min(len0, i + 32766 * v_int16::nlanes)) |
||||
{ |
||||
v_int8 v_sum8 = vx_setzero_s8(); |
||||
int k = j; |
||||
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes) |
||||
v_sum8 += v_one & v_pack( |
||||
v_pack(v_reinterpret_as_s32(vx_load(src + k ) == v_zero), v_reinterpret_as_s32(vx_load(src + k + v_float32::nlanes) == v_zero)), |
||||
v_pack(v_reinterpret_as_s32(vx_load(src + k + 2*v_float32::nlanes) == v_zero), v_reinterpret_as_s32(vx_load(src + k + 3*v_float32::nlanes) == v_zero)) |
||||
); |
||||
v_int16 part1, part2; |
||||
v_expand(v_sum8, part1, part2); |
||||
v_sum16 += part1 + part2; |
||||
j = k; |
||||
} |
||||
v_int32 part1, part2; |
||||
v_expand(v_sum16, part1, part2); |
||||
v_sum32 += part1 + part2; |
||||
i = j; |
||||
} |
||||
nz = i - v_reduce_sum(v_sum32); |
||||
v_cleanup(); |
||||
#endif |
||||
return nz + countNonZero_(src + i, len - i); |
||||
} |
||||
|
||||
static int countNonZero64f( const double* src, int len ) |
||||
{ |
||||
return countNonZero_(src, len); |
||||
} |
||||
|
||||
CountNonZeroFunc getCountNonZeroTab(int depth) |
||||
{ |
||||
static CountNonZeroFunc countNonZeroTab[] = |
||||
{ |
||||
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), |
||||
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), |
||||
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f), |
||||
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0 |
||||
}; |
||||
|
||||
return countNonZeroTab[depth]; |
||||
} |
||||
|
||||
#endif |
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END |
||||
} // namespace
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,245 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
|
||||
#include "precomp.hpp" |
||||
#include "opencl_kernels_core.hpp" |
||||
#include "stat.hpp" |
||||
|
||||
#include "sum.simd.hpp" |
||||
#include "sum.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content |
||||
|
||||
#undef HAVE_IPP |
||||
#undef CV_IPP_RUN_FAST |
||||
#define CV_IPP_RUN_FAST(f, ...) |
||||
#undef CV_IPP_RUN |
||||
#define CV_IPP_RUN(c, f, ...) |
||||
|
||||
namespace cv |
||||
{ |
||||
|
||||
SumFunc getSumFunc(int depth) |
||||
{ |
||||
CV_INSTRUMENT_REGION(); |
||||
CV_CPU_DISPATCH(getSumFunc, (depth), |
||||
CV_CPU_DISPATCH_MODES_ALL); |
||||
} |
||||
|
||||
#ifdef HAVE_OPENCL |
||||
|
||||
bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask, |
||||
InputArray _src2, bool calc2, const Scalar & res2 ) |
||||
{ |
||||
CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR); |
||||
|
||||
const ocl::Device & dev = ocl::Device::getDefault(); |
||||
bool doubleSupport = dev.doubleFPConfig() > 0, |
||||
haveMask = _mask.kind() != _InputArray::NONE, |
||||
haveSrc2 = _src2.kind() != _InputArray::NONE; |
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), |
||||
kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src, _src2) : 1, |
||||
mcn = std::max(cn, kercn); |
||||
CV_Assert(!haveSrc2 || _src2.type() == type); |
||||
int convert_cn = haveSrc2 ? mcn : cn; |
||||
|
||||
if ( (!doubleSupport && depth == CV_64F) || cn > 4 ) |
||||
return false; |
||||
|
||||
int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1); |
||||
size_t wgs = dev.maxWorkGroupSize(); |
||||
|
||||
int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth), |
||||
dtype = CV_MAKE_TYPE(ddepth, cn); |
||||
CV_Assert(!haveMask || _mask.type() == CV_8UC1); |
||||
|
||||
int wgs2_aligned = 1; |
||||
while (wgs2_aligned < (int)wgs) |
||||
wgs2_aligned <<= 1; |
||||
wgs2_aligned >>= 1; |
||||
|
||||
static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" }; |
||||
char cvt[2][40]; |
||||
String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d" |
||||
" -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d%s%s%s -D convertFromU=%s", |
||||
ocl::typeToStr(CV_MAKE_TYPE(depth, mcn)), ocl::typeToStr(depth), |
||||
ocl::typeToStr(dtype), ocl::typeToStr(CV_MAKE_TYPE(ddepth, mcn)), |
||||
ocl::typeToStr(ddepth), ddepth, cn, |
||||
ocl::convertTypeStr(depth, ddepth, mcn, cvt[0]), |
||||
opMap[sum_op], (int)wgs, wgs2_aligned, |
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "", |
||||
haveMask ? " -D HAVE_MASK" : "", |
||||
_src.isContinuous() ? " -D HAVE_SRC_CONT" : "", |
||||
haveMask && _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn, |
||||
haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "", |
||||
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", |
||||
depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, convert_cn, cvt[1]) : "noconvert"); |
||||
|
||||
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts); |
||||
if (k.empty()) |
||||
return false; |
||||
|
||||
UMat src = _src.getUMat(), src2 = _src2.getUMat(), |
||||
db(1, dbsize, dtype), mask = _mask.getUMat(); |
||||
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), |
||||
dbarg = ocl::KernelArg::PtrWriteOnly(db), |
||||
maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), |
||||
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2); |
||||
|
||||
if (haveMask) |
||||
{ |
||||
if (haveSrc2) |
||||
k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg, src2arg); |
||||
else |
||||
k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg); |
||||
} |
||||
else |
||||
{ |
||||
if (haveSrc2) |
||||
k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, src2arg); |
||||
else |
||||
k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg); |
||||
} |
||||
|
||||
size_t globalsize = ngroups * wgs; |
||||
if (k.run(1, &globalsize, &wgs, true)) |
||||
{ |
||||
typedef Scalar (*part_sum)(Mat m); |
||||
part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> }, |
||||
func = funcs[ddepth - CV_32S]; |
||||
|
||||
Mat mres = db.getMat(ACCESS_READ); |
||||
if (calc2) |
||||
const_cast<Scalar &>(res2) = func(mres.colRange(ngroups, dbsize)); |
||||
|
||||
res = func(mres.colRange(0, ngroups)); |
||||
return true; |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
#endif |
||||
|
||||
#ifdef HAVE_IPP |
||||
static bool ipp_sum(Mat &src, Scalar &_res) |
||||
{ |
||||
CV_INSTRUMENT_REGION_IPP(); |
||||
|
||||
#if IPP_VERSION_X100 >= 700 |
||||
int cn = src.channels(); |
||||
if (cn > 4) |
||||
return false; |
||||
size_t total_size = src.total(); |
||||
int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; |
||||
if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) |
||||
{ |
||||
IppiSize sz = { cols, rows }; |
||||
int type = src.type(); |
||||
typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm); |
||||
typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *); |
||||
ippiSumFuncHint ippiSumHint = |
||||
type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R : |
||||
type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R : |
||||
type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R : |
||||
0; |
||||
ippiSumFuncNoHint ippiSum = |
||||
type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R : |
||||
type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R : |
||||
type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R : |
||||
type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R : |
||||
type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R : |
||||
type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R : |
||||
type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R : |
||||
type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R : |
||||
type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R : |
||||
0; |
||||
CV_Assert(!ippiSumHint || !ippiSum); |
||||
if( ippiSumHint || ippiSum ) |
||||
{ |
||||
Ipp64f res[4]; |
||||
IppStatus ret = ippiSumHint ? |
||||
CV_INSTRUMENT_FUN_IPP(ippiSumHint, src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) : |
||||
CV_INSTRUMENT_FUN_IPP(ippiSum, src.ptr(), (int)src.step[0], sz, res); |
||||
if( ret >= 0 ) |
||||
{ |
||||
for( int i = 0; i < cn; i++ ) |
||||
_res[i] = res[i]; |
||||
return true; |
||||
} |
||||
} |
||||
} |
||||
#else |
||||
CV_UNUSED(src); CV_UNUSED(_res); |
||||
#endif |
||||
return false; |
||||
} |
||||
#endif |
||||
|
||||
Scalar sum(InputArray _src) |
||||
{ |
||||
CV_INSTRUMENT_REGION(); |
||||
|
||||
#if defined HAVE_OPENCL || defined HAVE_IPP |
||||
Scalar _res; |
||||
#endif |
||||
|
||||
#ifdef HAVE_OPENCL |
||||
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, |
||||
ocl_sum(_src, _res, OCL_OP_SUM), |
||||
_res) |
||||
#endif |
||||
|
||||
Mat src = _src.getMat(); |
||||
CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_sum(src, _res), _res); |
||||
|
||||
int k, cn = src.channels(), depth = src.depth(); |
||||
SumFunc func = getSumFunc(depth); |
||||
CV_Assert( cn <= 4 && func != 0 ); |
||||
|
||||
const Mat* arrays[] = {&src, 0}; |
||||
uchar* ptrs[1] = {}; |
||||
NAryMatIterator it(arrays, ptrs); |
||||
Scalar s; |
||||
int total = (int)it.size, blockSize = total, intSumBlockSize = 0; |
||||
int j, count = 0; |
||||
AutoBuffer<int> _buf; |
||||
int* buf = (int*)&s[0]; |
||||
size_t esz = 0; |
||||
bool blockSum = depth < CV_32S; |
||||
|
||||
if( blockSum ) |
||||
{ |
||||
intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15); |
||||
blockSize = std::min(blockSize, intSumBlockSize); |
||||
_buf.allocate(cn); |
||||
buf = _buf.data(); |
||||
|
||||
for( k = 0; k < cn; k++ ) |
||||
buf[k] = 0; |
||||
esz = src.elemSize(); |
||||
} |
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it ) |
||||
{ |
||||
for( j = 0; j < total; j += blockSize ) |
||||
{ |
||||
int bsz = std::min(total - j, blockSize); |
||||
func( ptrs[0], 0, (uchar*)buf, bsz, cn ); |
||||
count += bsz; |
||||
if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) ) |
||||
{ |
||||
for( k = 0; k < cn; k++ ) |
||||
{ |
||||
s[k] += buf[k]; |
||||
buf[k] = 0; |
||||
} |
||||
count = 0; |
||||
} |
||||
ptrs[0] += bsz*esz; |
||||
} |
||||
} |
||||
return s; |
||||
} |
||||
|
||||
} // namespace
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue