@ -617,6 +617,118 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
return ( const z_crc_t FAR * ) crc_table ;
}
/* =========================================================================
* Use ARM machine instructions if requested . This will compute the CRC about
* ten times faster than the braided calculation . This code does not check for
* the presence of the CRC instruction . Compile with care .
*/
# if defined(Z_ARM_CRC32) && defined(__aarch64__) && W == 8
/*
Constants empirically determined to maximize speed . These values are from
measurements on a Cortex - A57 . Your mileage may vary .
*/
# define Z_BATCH 3990 /* number of words in a batch */
# define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
# define Z_BATCH_MIN 800 /* fewest words in a final batch */
unsigned long ZEXPORT crc32_z ( crc , buf , len )
unsigned long crc ;
const unsigned char FAR * buf ;
z_size_t len ;
{
z_crc_t val ;
z_word_t crc1 , crc2 ;
const z_word_t * word ;
z_word_t val0 , val1 , val2 ;
z_size_t last , last2 , i ;
z_size_t num ;
/* Return initial CRC, if requested. */
if ( buf = = Z_NULL ) return 0 ;
# ifdef DYNAMIC_CRC_TABLE
once ( & made , make_crc_table ) ;
# endif /* DYNAMIC_CRC_TABLE */
/* Pre-condition the CRC */
crc ^ = 0xffffffff ;
/* Compute the CRC up to a word boundary. */
while ( len & & ( ( z_size_t ) buf & 7 ) ! = 0 ) {
len - - ;
val = * buf + + ;
__asm__ volatile ( " crc32b %w0, %w0, %w1 " : " +r " ( crc ) : " r " ( val ) ) ;
}
/* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
word = ( z_word_t const * ) buf ;
num = len > > 3 ;
len & = 7 ;
/* Do three interleaved CRCs to realize the throughput of one crc32x
instruction per cycle . Each CRC is calcuated on Z_BATCH words . The three
CRCs are combined into a single CRC after each set of batches . */
while ( num > = 3 * Z_BATCH ) {
crc1 = 0 ;
crc2 = 0 ;
for ( i = 0 ; i < Z_BATCH ; i + + ) {
val0 = word [ i ] ;
val1 = word [ i + Z_BATCH ] ;
val2 = word [ i + 2 * Z_BATCH ] ;
__asm__ volatile ( " crc32x %w0, %w0, %x1 " : " +r " ( crc ) : " r " ( val0 ) ) ;
__asm__ volatile ( " crc32x %w0, %w0, %x1 " : " +r " ( crc1 ) : " r " ( val1 ) ) ;
__asm__ volatile ( " crc32x %w0, %w0, %x1 " : " +r " ( crc2 ) : " r " ( val2 ) ) ;
}
word + = 3 * Z_BATCH ;
num - = 3 * Z_BATCH ;
crc = multmodp ( Z_BATCH_ZEROS , crc ) ^ crc1 ;
crc = multmodp ( Z_BATCH_ZEROS , crc ) ^ crc2 ;
}
/* Do one last smaller batch with the remaining words, if there are enough
to pay for the combination of CRCs . */
last = num / 3 ;
if ( last > = Z_BATCH_MIN ) {
last2 = last < < 1 ;
crc1 = 0 ;
crc2 = 0 ;
for ( i = 0 ; i < last ; i + + ) {
val0 = word [ i ] ;
val1 = word [ i + last ] ;
val2 = word [ i + last2 ] ;
__asm__ volatile ( " crc32x %w0, %w0, %x1 " : " +r " ( crc ) : " r " ( val0 ) ) ;
__asm__ volatile ( " crc32x %w0, %w0, %x1 " : " +r " ( crc1 ) : " r " ( val1 ) ) ;
__asm__ volatile ( " crc32x %w0, %w0, %x1 " : " +r " ( crc2 ) : " r " ( val2 ) ) ;
}
word + = 3 * last ;
num - = 3 * last ;
val = x2nmodp ( last , 6 ) ;
crc = multmodp ( val , crc ) ^ crc1 ;
crc = multmodp ( val , crc ) ^ crc2 ;
}
/* Compute the CRC on any remaining words. */
for ( i = 0 ; i < num ; i + + ) {
val0 = word [ i ] ;
__asm__ volatile ( " crc32x %w0, %w0, %x1 " : " +r " ( crc ) : " r " ( val0 ) ) ;
}
word + = num ;
/* Complete the CRC on any remaining bytes. */
buf = ( const unsigned char FAR * ) word ;
while ( len ) {
len - - ;
val = * buf + + ;
__asm__ volatile ( " crc32b %w0, %w0, %w1 " : " +r " ( crc ) : " r " ( val ) ) ;
}
/* Return the CRC, post-conditioned. */
return crc ^ 0xffffffff ;
}
# else
/* ========================================================================= */
unsigned long ZEXPORT crc32_z ( crc , buf , len )
unsigned long crc ;
@ -939,6 +1051,8 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
return crc ^ 0xffffffff ;
}
# endif
/* ========================================================================= */
unsigned long ZEXPORT crc32 ( crc , buf , len )
unsigned long crc ;