mirror of https://github.com/opencv/opencv.git
Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
650 lines
19 KiB
650 lines
19 KiB
/////////////////////////////////////////////////////////////////////////// |
|
// |
|
// Copyright (c) 2009-2014 DreamWorks Animation LLC. |
|
// |
|
// All rights reserved. |
|
// |
|
// Redistribution and use in source and binary forms, with or without |
|
// modification, are permitted provided that the following conditions are |
|
// met: |
|
// * Redistributions of source code must retain the above copyright |
|
// notice, this list of conditions and the following disclaimer. |
|
// * Redistributions in binary form must reproduce the above |
|
// copyright notice, this list of conditions and the following disclaimer |
|
// in the documentation and/or other materials provided with the |
|
// distribution. |
|
// * Neither the name of DreamWorks Animation nor the names of |
|
// its contributors may be used to endorse or promote products derived |
|
// from this software without specific prior written permission. |
|
// |
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
// |
|
/////////////////////////////////////////////////////////////////////////// |
|
|
|
#define OPENEXR_BUILTIN_TABLES |
|
|
|
// |
|
// A program to generate various acceleration lookup tables |
|
// for Imf::DwaCompressor |
|
// |
|
|
|
#include <cstddef> |
|
#include <stdio.h> |
|
#include <stdlib.h> |
|
#include <math.h> |
|
#include <vector> |
|
|
|
#include <OpenEXRConfig.h> |
|
|
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
#ifdef OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN |
|
#include <unistd.h> |
|
#endif |
|
#endif // OPENEXR_BUILTIN_TABLES |
|
|
|
#include <half.h> |
|
#include <IlmThread.h> |
|
#include <IlmThreadSemaphore.h> |
|
#include <ImfIO.h> |
|
#include <ImfXdr.h> |
|
#include "ImfNamespace.h" |
|
|
|
using namespace OPENEXR_IMF_NAMESPACE; |
|
|
|
namespace { |
|
|
|
#ifdef OPENEXR_BUILTIN_TABLES |
|
static unsigned short dwaCompressorNoOp[0x10000] = {}; |
|
static unsigned short dwaCompressorToLinear[0x10000] = {}; |
|
static unsigned short dwaCompressorToNonlinear[0x10000] = {}; |
|
|
|
//static unsigned int closestDataOffset[0x10000] = {}; |
|
//static unsigned short closestData[0x80000] = {}; |
|
#else |
|
|
|
class LutHeaderWorker |
|
{ |
|
public: |
|
class Runner : public ILMTHREAD_NAMESPACE::Thread |
|
{ |
|
public: |
|
Runner(LutHeaderWorker &worker, bool output): |
|
ILMTHREAD_NAMESPACE::Thread(), |
|
_worker(worker), |
|
_output(output) |
|
{ |
|
start(); |
|
} |
|
|
|
virtual ~Runner() |
|
{ |
|
_semaphore.wait(); |
|
} |
|
|
|
virtual void run() |
|
{ |
|
_semaphore.post(); |
|
_worker.run(_output); |
|
} |
|
|
|
private: |
|
LutHeaderWorker &_worker; |
|
bool _output; |
|
ILMTHREAD_NAMESPACE::Semaphore _semaphore; |
|
|
|
}; // class LutHeaderWorker::Runner |
|
|
|
|
|
LutHeaderWorker(size_t startValue, |
|
size_t endValue): |
|
_lastCandidateCount(0), |
|
_startValue(startValue), |
|
_endValue(endValue), |
|
_numElements(0), |
|
_offset(new size_t[numValues()]), |
|
_elements(new unsigned short[1024*1024*2]) |
|
{ |
|
} |
|
|
|
~LutHeaderWorker() |
|
{ |
|
delete[] _offset; |
|
delete[] _elements; |
|
} |
|
|
|
size_t lastCandidateCount() const |
|
{ |
|
return _lastCandidateCount; |
|
} |
|
|
|
size_t numValues() const |
|
{ |
|
return _endValue - _startValue; |
|
} |
|
|
|
size_t numElements() const |
|
{ |
|
return _numElements; |
|
} |
|
|
|
const size_t* offset() const |
|
{ |
|
return _offset; |
|
} |
|
|
|
const unsigned short* elements() const |
|
{ |
|
return _elements; |
|
} |
|
|
|
void run(bool outputProgress) |
|
{ |
|
half candidate[16]; |
|
int candidateCount = 0; |
|
|
|
for (size_t input=_startValue; input<_endValue; ++input) { |
|
|
|
if (outputProgress) { |
|
#ifdef __GNUC__ |
|
if (input % 100 == 0) { |
|
fprintf(stderr, |
|
" Building acceleration for DwaCompressor, %.2f %% %c", |
|
100.*(float)input/(float)numValues(), 13); |
|
} |
|
#else |
|
if (input % 1000 == 0) { |
|
fprintf(stderr, |
|
" Building acceleration for DwaCompressor, %.2f %%\n", |
|
100.*(float)input/(float)numValues()); |
|
} |
|
#endif |
|
} |
|
|
|
|
|
int numSetBits = countSetBits(input); |
|
half inputHalf, closestHalf; |
|
|
|
inputHalf.setBits(input); |
|
|
|
_offset[input - _startValue] = _numElements; |
|
|
|
// Gather candidates |
|
candidateCount = 0; |
|
for (int targetNumSetBits=numSetBits-1; targetNumSetBits>=0; |
|
--targetNumSetBits) { |
|
bool valueFound = false; |
|
|
|
for (int i=0; i<65536; ++i) { |
|
if (countSetBits(i) != targetNumSetBits) continue; |
|
|
|
if (!valueFound) { |
|
closestHalf.setBits(i); |
|
valueFound = true; |
|
} else { |
|
half tmpHalf; |
|
|
|
tmpHalf.setBits(i); |
|
|
|
if (fabs((float)inputHalf - (float)tmpHalf) < |
|
fabs((float)inputHalf - (float)closestHalf)) { |
|
closestHalf = tmpHalf; |
|
} |
|
} |
|
} |
|
|
|
if (valueFound == false) { |
|
fprintf(stderr, "bork bork bork!\n"); |
|
} |
|
|
|
candidate[candidateCount] = closestHalf; |
|
candidateCount++; |
|
} |
|
|
|
// Sort candidates by increasing number of bits set |
|
for (int i=0; i<candidateCount; ++i) { |
|
for (int j=i+1; j<candidateCount; ++j) { |
|
|
|
int iCnt = countSetBits(candidate[i].bits()); |
|
int jCnt = countSetBits(candidate[j].bits()); |
|
|
|
if (jCnt < iCnt) { |
|
half tmp = candidate[i]; |
|
candidate[i] = candidate[j]; |
|
candidate[j] = tmp; |
|
} |
|
} |
|
} |
|
|
|
// Copy candidates to the data buffer; |
|
for (int i=0; i<candidateCount; ++i) { |
|
_elements[_numElements] = candidate[i].bits(); |
|
_numElements++; |
|
} |
|
|
|
if (input == _endValue-1) { |
|
_lastCandidateCount = candidateCount; |
|
} |
|
} |
|
} |
|
|
|
|
|
private: |
|
size_t _lastCandidateCount; |
|
size_t _startValue; |
|
size_t _endValue; |
|
size_t _numElements; |
|
size_t *_offset; |
|
unsigned short *_elements; |
|
|
|
// |
|
// Precomputing the bit count runs faster than using |
|
// the builtin instruction, at least in one case.. |
|
// |
|
// Precomputing 8-bits is no slower than 16-bits, |
|
// and saves a fair bit of overhead.. |
|
// |
|
int countSetBits(unsigned short src) |
|
{ |
|
static const unsigned short numBitsSet[256] = |
|
{ |
|
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, |
|
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
|
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
|
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
|
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
|
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
|
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
|
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
|
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
|
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 |
|
}; |
|
|
|
return numBitsSet[src & 0xff] + numBitsSet[src >> 8]; |
|
} |
|
|
|
}; // class LutHeaderWorker |
|
|
|
#endif // OPENEXR_BUILTIN_TABLES |
|
|
|
} // namespace |
|
|
|
|
|
// |
|
// Generate a no-op LUT, to cut down in conditional branches |
|
// |
|
static void |
|
generateNoop() |
|
{ |
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
printf("const unsigned short dwaCompressorNoOp[] = \n"); |
|
printf("{"); |
|
#endif // OPENEXR_BUILTIN_TABLES |
|
for (int i=0; i<65536; ++i) { |
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
if (i % 8 == 0) { |
|
printf("\n "); |
|
} |
|
#endif // OPENEXR_BUILTIN_TABLES |
|
unsigned short dst; |
|
char *tmp = (char *)(&dst); |
|
|
|
unsigned short src = (unsigned short)i; |
|
Xdr::write <CharPtrIO> (tmp, src); |
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
printf("0x%04x, ", dst); |
|
#else |
|
dwaCompressorNoOp[i] = dst; |
|
#endif // OPENEXR_BUILTIN_TABLES |
|
} |
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
printf("\n};\n"); |
|
#endif // OPENEXR_BUILTIN_TABLES |
|
} |
|
|
|
// |
|
// Nonlinearly encode luminance. For values below 1.0, we want |
|
// to use a gamma 2.2 function to match what is fairly common |
|
// for storing output referred. However, > 1, gamma functions blow up, |
|
// and log functions are much better behaved. We could use a log |
|
// function everywhere, but it tends to over-sample dark |
|
// regions and undersample the brighter regions, when |
|
// compared to the way real devices reproduce values. |
|
// |
|
// So, above 1, use a log function which is a smooth blend |
|
// into the gamma function. |
|
// |
|
// Nonlinear(linear) = |
|
// |
|
// linear^(1./2.2) / linear <= 1.0 |
|
// | |
|
// ln(linear)/ln(e^2.2) + 1 \ otherwise |
|
// |
|
// |
|
// toNonlinear[] needs to take in XDR format half float values, |
|
// and output NATIVE format float. |
|
// |
|
// toLinear[] does the opposite - takes in NATIVE half and |
|
// outputs XDR half values. |
|
// |
|
|
|
static void |
|
generateToLinear() |
|
{ |
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
unsigned short toLinear[65536]; |
|
#else |
|
unsigned short* toLinear = dwaCompressorToLinear; |
|
#endif // OPENEXR_BUILTIN_TABLES |
|
|
|
toLinear[0] = 0; |
|
|
|
for (int i=1; i<65536; ++i) { |
|
half h; |
|
float sign = 1; |
|
float logBase = pow(2.7182818, 2.2); |
|
|
|
// map NaN and inf to 0 |
|
if ((i & 0x7c00) == 0x7c00) { |
|
toLinear[i] = 0; |
|
continue; |
|
} |
|
|
|
// |
|
// _toLinear - assume i is NATIVE, but our output needs |
|
// to get flipped to XDR |
|
// |
|
h.setBits(i); |
|
sign = 1; |
|
if ((float)h < 0) { |
|
sign = -1; |
|
} |
|
|
|
if ( fabs( (float)h) <= 1.0 ) { |
|
h = (half)(sign * pow((float)fabs((float)h), 2.2f)); |
|
} else { |
|
h = (half)(sign * pow(logBase, (float)(fabs((float)h) - 1.0))); |
|
} |
|
|
|
{ |
|
char *tmp = (char *)(&toLinear[i]); |
|
|
|
Xdr::write <CharPtrIO> ( tmp, h.bits()); |
|
} |
|
} |
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
printf("const unsigned short dwaCompressorToLinear[] = \n"); |
|
printf("{"); |
|
for (int i=0; i<65536; ++i) { |
|
if (i % 8 == 0) { |
|
printf("\n "); |
|
} |
|
printf("0x%04x, ", toLinear[i]); |
|
} |
|
printf("\n};\n"); |
|
#endif // OPENEXR_BUILTIN_TABLES |
|
} |
|
|
|
|
|
static void |
|
generateToNonlinear() |
|
{ |
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
unsigned short toNonlinear[65536]; |
|
#else |
|
unsigned short* toNonlinear = dwaCompressorToNonlinear; |
|
#endif // OPENEXR_BUILTIN_TABLES |
|
|
|
toNonlinear[0] = 0; |
|
|
|
for (int i=1; i<65536; ++i) { |
|
unsigned short usNative, usXdr; |
|
half h; |
|
float sign = 1; |
|
float logBase = pow(2.7182818, 2.2); |
|
|
|
usXdr = i; |
|
|
|
{ |
|
const char *tmp = (char *)(&usXdr); |
|
|
|
Xdr::read<CharPtrIO>(tmp, usNative); |
|
} |
|
|
|
// map NaN and inf to 0 |
|
if ((usNative & 0x7c00) == 0x7c00) { |
|
toNonlinear[i] = 0; |
|
continue; |
|
} |
|
|
|
// |
|
// toNonlinear - assume i is XDR |
|
// |
|
h.setBits(usNative); |
|
sign = 1; |
|
if ((float)h < 0) { |
|
sign = -1; |
|
} |
|
|
|
if ( fabs( (float)h ) <= 1.0) { |
|
h = (half)(sign * pow(fabs((float)h), 1.f/2.2f)); |
|
} else { |
|
h = (half)(sign * ( log(fabs((float)h)) / log(logBase) + 1.0) ); |
|
} |
|
toNonlinear[i] = h.bits(); |
|
} |
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
printf("const unsigned short dwaCompressorToNonlinear[] = \n"); |
|
printf("{"); |
|
for (int i=0; i<65536; ++i) { |
|
if (i % 8 == 0) { |
|
printf("\n "); |
|
} |
|
printf("0x%04x, ", toNonlinear[i]); |
|
} |
|
printf("\n};\n"); |
|
#endif // OPENEXR_BUILTIN_TABLES |
|
} |
|
|
|
|
|
#ifndef OPENEXR_BUILTIN_TABLES |
|
// |
|
// Attempt to get available CPUs in a somewhat portable way. |
|
// |
|
|
|
int |
|
cpuCount() |
|
{ |
|
if (!ILMTHREAD_NAMESPACE::supportsThreads()) return 1; |
|
|
|
int cpuCount = 1; |
|
|
|
#if defined (OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN) |
|
|
|
cpuCount = sysconf(_SC_NPROCESSORS_ONLN); |
|
|
|
#elif defined (_WIN32) |
|
|
|
SYSTEM_INFO sysinfo; |
|
GetSystemInfo( &sysinfo ); |
|
cpuCount = sysinfo.dwNumberOfProcessors; |
|
|
|
#endif |
|
|
|
if (cpuCount < 1) cpuCount = 1; |
|
return cpuCount; |
|
} |
|
|
|
// |
|
// Generate acceleration luts for the quantization. |
|
// |
|
// For each possible input value, we want to find the closest numbers |
|
// which have one fewer bits set than before. |
|
// |
|
// This gives us num_bits(input)-1 values per input. If we alloc |
|
// space for everything, that's like a 2MB table. We can do better |
|
// by compressing all the values to be contigious and using offset |
|
// pointers. |
|
// |
|
// After we've found the candidates with fewer bits set, sort them |
|
// based on increasing numbers of bits set. This way, on quantize(), |
|
// we can scan through the list and halt once we find the first |
|
// candidate within the error range. For small values that can |
|
// be quantized to 0, 0 is the first value tested and the search |
|
// can exit fairly quickly. |
|
// |
|
|
|
void |
|
generateLutHeader() |
|
{ |
|
std::vector<LutHeaderWorker*> workers; |
|
|
|
size_t numWorkers = cpuCount(); |
|
size_t workerInterval = 65536 / numWorkers; |
|
|
|
for (size_t i=0; i<numWorkers; ++i) { |
|
if (i != numWorkers-1) { |
|
workers.push_back( new LutHeaderWorker( i *workerInterval, |
|
(i+1)*workerInterval) ); |
|
} else { |
|
workers.push_back( new LutHeaderWorker(i*workerInterval, 65536) ); |
|
} |
|
} |
|
|
|
if (ILMTHREAD_NAMESPACE::supportsThreads()) { |
|
std::vector<LutHeaderWorker::Runner*> runners; |
|
for (size_t i=0; i<workers.size(); ++i) { |
|
runners.push_back( new LutHeaderWorker::Runner(*workers[i], (i==0)) ); |
|
} |
|
|
|
for (size_t i=0; i<workers.size(); ++i) { |
|
delete runners[i]; |
|
} |
|
} else { |
|
for (size_t i=0; i<workers.size(); ++i) { |
|
workers[i]->run(i == 0); |
|
} |
|
} |
|
|
|
printf("static unsigned int closestDataOffset[] = {\n"); |
|
int offsetIdx = 0; |
|
int offsetPrev = 0; |
|
for (size_t i=0; i<workers.size(); ++i) { |
|
for (size_t value=0; value<workers[i]->numValues(); ++value) { |
|
if (offsetIdx % 8 == 0) { |
|
printf(" "); |
|
} |
|
printf("%6lu, ", workers[i]->offset()[value] + offsetPrev); |
|
if (offsetIdx % 8 == 7) { |
|
printf("\n"); |
|
} |
|
offsetIdx++; |
|
} |
|
offsetPrev += workers[i]->offset()[workers[i]->numValues()-1] + |
|
workers[i]->lastCandidateCount(); |
|
} |
|
printf("};\n\n\n"); |
|
|
|
|
|
printf("static unsigned short closestData[] = {\n"); |
|
int elementIdx = 0; |
|
for (size_t i=0; i<workers.size(); ++i) { |
|
for (size_t element=0; element<workers[i]->numElements(); ++element) { |
|
if (elementIdx % 8 == 0) { |
|
printf(" "); |
|
} |
|
printf("%5d, ", workers[i]->elements()[element]); |
|
if (elementIdx % 8 == 7) { |
|
printf("\n"); |
|
} |
|
elementIdx++; |
|
} |
|
} |
|
printf("};\n\n\n"); |
|
|
|
for (size_t i=0; i<workers.size(); ++i) { |
|
delete workers[i]; |
|
} |
|
} |
|
|
|
|
|
int |
|
main(int argc, char **argv) |
|
{ |
|
printf("#include <cstddef>\n"); |
|
printf("\n\n\n"); |
|
|
|
generateNoop(); |
|
|
|
printf("\n\n\n"); |
|
|
|
generateToLinear(); |
|
|
|
printf("\n\n\n"); |
|
|
|
generateToNonlinear(); |
|
|
|
printf("\n\n\n"); |
|
|
|
generateLutHeader(); |
|
|
|
return 0; |
|
} |
|
#else // OPENEXR_BUILTIN_TABLES |
|
|
|
#include "dwaLookups.h" |
|
|
|
OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER |
|
|
|
static void init_dwa_() |
|
{ |
|
generateNoop(); |
|
generateToLinear(); |
|
generateToNonlinear(); |
|
// N/A: generateLutHeader(); |
|
} |
|
|
|
static inline void init_dwa() |
|
{ |
|
static bool initialized = false; |
|
if (!initialized) |
|
{ |
|
init_dwa_(); |
|
initialized = true; |
|
} |
|
} |
|
|
|
const unsigned short* get_dwaCompressorNoOp() |
|
{ |
|
init_dwa(); |
|
return dwaCompressorNoOp; |
|
} |
|
const unsigned short* get_dwaCompressorToLinear() |
|
{ |
|
init_dwa(); |
|
return dwaCompressorToLinear; |
|
} |
|
const unsigned short* get_dwaCompressorToNonlinear() |
|
{ |
|
init_dwa(); |
|
return dwaCompressorToNonlinear; |
|
} |
|
|
|
OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT |
|
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
|