mirror of https://github.com/opencv/opencv.git
Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
267 lines
7.7 KiB
267 lines
7.7 KiB
/* crc32_braid.c -- compute the CRC-32 of a data stream |
|
* Copyright (C) 1995-2022 Mark Adler |
|
* For conditions of distribution and use, see copyright notice in zlib.h |
|
* |
|
* This interleaved implementation of a CRC makes use of pipelined multiple |
|
* arithmetic-logic units, commonly found in modern CPU cores. It is due to |
|
* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. |
|
*/ |
|
|
|
#include "zbuild.h" |
|
#include "zutil.h" |
|
#include "functable.h" |
|
#include "crc32_braid_p.h" |
|
#include "crc32_braid_tbl.h" |
|
|
|
/* ========================================================================= */ |
|
|
|
const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) { |
|
return (const uint32_t *)crc_table; |
|
} |
|
|
|
#ifdef ZLIB_COMPAT |
|
unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) { |
|
if (buf == NULL) return 0; |
|
|
|
return (unsigned long)functable.crc32((uint32_t)crc, buf, len); |
|
} |
|
#else |
|
uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) { |
|
if (buf == NULL) return 0; |
|
|
|
return functable.crc32(crc, buf, len); |
|
} |
|
#endif |
|
|
|
#ifdef ZLIB_COMPAT |
|
unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) { |
|
return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len); |
|
} |
|
#else |
|
uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) { |
|
return PREFIX(crc32_z)(crc, buf, len); |
|
} |
|
#endif |
|
|
|
/* ========================================================================= */ |
|
|
|
/* |
|
A CRC of a message is computed on N braids of words in the message, where |
|
each word consists of W bytes (4 or 8). If N is 3, for example, then three |
|
running sparse CRCs are calculated respectively on each braid, at these |
|
indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... |
|
This is done starting at a word boundary, and continues until as many blocks |
|
of N * W bytes as are available have been processed. The results are combined |
|
into a single CRC at the end. For this code, N must be in the range 1..6 and |
|
W must be 4 or 8. The upper limit on N can be increased if desired by adding |
|
more #if blocks, extending the patterns apparent in the code. In addition, |
|
crc32 tables would need to be regenerated, if the maximum N value is increased. |
|
|
|
N and W are chosen empirically by benchmarking the execution time on a given |
|
processor. The choices for N and W below were based on testing on Intel Kaby |
|
Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 |
|
Octeon II processors. The Intel, AMD, and ARM processors were all fastest |
|
with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. |
|
They were all tested with either gcc or clang, all using the -O3 optimization |
|
level. Your mileage may vary. |
|
*/ |
|
|
|
/* ========================================================================= */ |
|
|
|
#if BYTE_ORDER == LITTLE_ENDIAN |
|
# define ZSWAPWORD(word) (word) |
|
# define BRAID_TABLE crc_braid_table |
|
#elif BYTE_ORDER == BIG_ENDIAN |
|
# if W == 8 |
|
# define ZSWAPWORD(word) ZSWAP64(word) |
|
# elif W == 4 |
|
# define ZSWAPWORD(word) ZSWAP32(word) |
|
# endif |
|
# define BRAID_TABLE crc_braid_big_table |
|
#else |
|
# error "No endian defined" |
|
#endif |
|
#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8) |
|
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 |
|
|
|
/* ========================================================================= */ |
|
#ifdef W |
|
/* |
|
Return the CRC of the W bytes in the word_t data, taking the |
|
least-significant byte of the word as the first byte of data, without any pre |
|
or post conditioning. This is used to combine the CRCs of each braid. |
|
*/ |
|
#if BYTE_ORDER == LITTLE_ENDIAN |
|
static uint32_t crc_word(z_word_t data) { |
|
int k; |
|
for (k = 0; k < W; k++) |
|
data = (data >> 8) ^ crc_table[data & 0xff]; |
|
return (uint32_t)data; |
|
} |
|
#elif BYTE_ORDER == BIG_ENDIAN |
|
static z_word_t crc_word(z_word_t data) { |
|
int k; |
|
for (k = 0; k < W; k++) |
|
data = (data << 8) ^ |
|
crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; |
|
return data; |
|
} |
|
#endif /* BYTE_ORDER */ |
|
|
|
#endif /* W */ |
|
|
|
/* ========================================================================= */ |
|
Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) { |
|
Z_REGISTER uint32_t c; |
|
|
|
/* Pre-condition the CRC */ |
|
c = (~crc) & 0xffffffff; |
|
|
|
#ifdef W |
|
/* If provided enough bytes, do a braided CRC calculation. */ |
|
if (len >= N * W + W - 1) { |
|
size_t blks; |
|
z_word_t const *words; |
|
int k; |
|
|
|
/* Compute the CRC up to a z_word_t boundary. */ |
|
while (len && ((uintptr_t)buf & (W - 1)) != 0) { |
|
len--; |
|
DO1; |
|
} |
|
|
|
/* Compute the CRC on as many N z_word_t blocks as are available. */ |
|
blks = len / (N * W); |
|
len -= blks * N * W; |
|
words = (z_word_t const *)buf; |
|
|
|
z_word_t crc0, word0, comb; |
|
#if N > 1 |
|
z_word_t crc1, word1; |
|
#if N > 2 |
|
z_word_t crc2, word2; |
|
#if N > 3 |
|
z_word_t crc3, word3; |
|
#if N > 4 |
|
z_word_t crc4, word4; |
|
#if N > 5 |
|
z_word_t crc5, word5; |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
/* Initialize the CRC for each braid. */ |
|
crc0 = ZSWAPWORD(c); |
|
#if N > 1 |
|
crc1 = 0; |
|
#if N > 2 |
|
crc2 = 0; |
|
#if N > 3 |
|
crc3 = 0; |
|
#if N > 4 |
|
crc4 = 0; |
|
#if N > 5 |
|
crc5 = 0; |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
/* Process the first blks-1 blocks, computing the CRCs on each braid independently. */ |
|
while (--blks) { |
|
/* Load the word for each braid into registers. */ |
|
word0 = crc0 ^ words[0]; |
|
#if N > 1 |
|
word1 = crc1 ^ words[1]; |
|
#if N > 2 |
|
word2 = crc2 ^ words[2]; |
|
#if N > 3 |
|
word3 = crc3 ^ words[3]; |
|
#if N > 4 |
|
word4 = crc4 ^ words[4]; |
|
#if N > 5 |
|
word5 = crc5 ^ words[5]; |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
words += N; |
|
|
|
/* Compute and update the CRC for each word. The loop should get unrolled. */ |
|
crc0 = BRAID_TABLE[0][word0 & 0xff]; |
|
#if N > 1 |
|
crc1 = BRAID_TABLE[0][word1 & 0xff]; |
|
#if N > 2 |
|
crc2 = BRAID_TABLE[0][word2 & 0xff]; |
|
#if N > 3 |
|
crc3 = BRAID_TABLE[0][word3 & 0xff]; |
|
#if N > 4 |
|
crc4 = BRAID_TABLE[0][word4 & 0xff]; |
|
#if N > 5 |
|
crc5 = BRAID_TABLE[0][word5 & 0xff]; |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
for (k = 1; k < W; k++) { |
|
crc0 ^= BRAID_TABLE[k][(word0 >> (k << 3)) & 0xff]; |
|
#if N > 1 |
|
crc1 ^= BRAID_TABLE[k][(word1 >> (k << 3)) & 0xff]; |
|
#if N > 2 |
|
crc2 ^= BRAID_TABLE[k][(word2 >> (k << 3)) & 0xff]; |
|
#if N > 3 |
|
crc3 ^= BRAID_TABLE[k][(word3 >> (k << 3)) & 0xff]; |
|
#if N > 4 |
|
crc4 ^= BRAID_TABLE[k][(word4 >> (k << 3)) & 0xff]; |
|
#if N > 5 |
|
crc5 ^= BRAID_TABLE[k][(word5 >> (k << 3)) & 0xff]; |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
} |
|
} |
|
|
|
/* Process the last block, combining the CRCs of the N braids at the same time. */ |
|
comb = crc_word(crc0 ^ words[0]); |
|
#if N > 1 |
|
comb = crc_word(crc1 ^ words[1] ^ comb); |
|
#if N > 2 |
|
comb = crc_word(crc2 ^ words[2] ^ comb); |
|
#if N > 3 |
|
comb = crc_word(crc3 ^ words[3] ^ comb); |
|
#if N > 4 |
|
comb = crc_word(crc4 ^ words[4] ^ comb); |
|
#if N > 5 |
|
comb = crc_word(crc5 ^ words[5] ^ comb); |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
#endif |
|
words += N; |
|
c = ZSWAPWORD(comb); |
|
|
|
/* Update the pointer to the remaining bytes to process. */ |
|
buf = (const unsigned char *)words; |
|
} |
|
|
|
#endif /* W */ |
|
|
|
/* Complete the computation of the CRC on any remaining bytes. */ |
|
while (len >= 8) { |
|
len -= 8; |
|
DO8; |
|
} |
|
while (len) { |
|
len--; |
|
DO1; |
|
} |
|
|
|
/* Return the CRC, post-conditioned. */ |
|
return c ^ 0xffffffff; |
|
}
|
|
|