mirror of https://github.com/opencv/opencv.git
Merge pull request #26113 from FantasqueX:zlib-ng-2-2-1
Update zlib-ng to 2.2.1 #26113 Release: https://github.com/zlib-ng/zlib-ng/releases/tag/2.2.1 ARM diagnostics patch: https://github.com/zlib-ng/zlib-ng/pull/1774 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMakepull/26146/head
parent
7de3a8e960
commit
85923c8f30
132 changed files with 7213 additions and 1750 deletions
File diff suppressed because it is too large
Load Diff
@ -1,11 +0,0 @@ |
||||
/* adler32_fold.h -- adler32 folding interface
|
||||
* Copyright (C) 2022 Adam Stylinski |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#ifndef ADLER32_FOLD_H_ |
||||
#define ADLER32_FOLD_H_ |
||||
|
||||
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
||||
|
||||
#endif |
@ -1,2 +0,0 @@ |
||||
# ignore Makefiles; they're all automatically generated |
||||
Makefile |
@ -0,0 +1,65 @@ |
||||
/* arm_functions.h -- ARM implementations for arch-specific functions.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#ifndef ARM_FUNCTIONS_H_ |
||||
#define ARM_FUNCTIONS_H_ |
||||
|
||||
#ifdef ARM_NEON |
||||
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len); |
||||
uint32_t chunksize_neon(void); |
||||
uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left); |
||||
|
||||
# ifdef HAVE_BUILTIN_CTZLL |
||||
uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1); |
||||
uint32_t longest_match_neon(deflate_state *const s, Pos cur_match); |
||||
uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match); |
||||
# endif |
||||
void slide_hash_neon(deflate_state *s); |
||||
void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start); |
||||
#endif |
||||
|
||||
#ifdef ARM_ACLE |
||||
uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len); |
||||
#endif |
||||
|
||||
#ifdef ARM_SIMD |
||||
void slide_hash_armv6(deflate_state *s); |
||||
#endif |
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION |
||||
// ARM - SIMD
|
||||
# if (defined(ARM_SIMD) && defined(__ARM_FEATURE_SIMD32)) || defined(ARM_NOCHECK_SIMD) |
||||
# undef native_slide_hash |
||||
# define native_slide_hash slide_hash_armv6 |
||||
# endif |
||||
// ARM - NEON
|
||||
# if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON |
||||
# undef native_adler32 |
||||
# define native_adler32 adler32_neon |
||||
# undef native_chunkmemset_safe |
||||
# define native_chunkmemset_safe chunkmemset_safe_neon |
||||
# undef native_chunksize |
||||
# define native_chunksize chunksize_neon |
||||
# undef native_inflate_fast |
||||
# define native_inflate_fast inflate_fast_neon |
||||
# undef native_slide_hash |
||||
# define native_slide_hash slide_hash_neon |
||||
# ifdef HAVE_BUILTIN_CTZLL |
||||
# undef native_compare256 |
||||
# define native_compare256 compare256_neon |
||||
# undef native_longest_match |
||||
# define native_longest_match longest_match_neon |
||||
# undef native_longest_match_slow |
||||
# define native_longest_match_slow longest_match_slow_neon |
||||
# endif |
||||
# endif |
||||
// ARM - ACLE
|
||||
# if defined(ARM_ACLE) && defined(__ARM_ACLE) && defined(__ARM_FEATURE_CRC32) |
||||
# undef native_crc32 |
||||
# define native_crc32 crc32_acle |
||||
# endif |
||||
#endif |
||||
|
||||
#endif /* ARM_FUNCTIONS_H_ */ |
@ -1,24 +0,0 @@ |
||||
/* insert_string_acle.c -- insert_string integer hash variant using ACLE's CRC instructions
|
||||
* |
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
* |
||||
*/ |
||||
|
||||
#ifdef ARM_ACLE |
||||
#include "acle_intrins.h" |
||||
#include "../../zbuild.h" |
||||
#include "../../deflate.h" |
||||
|
||||
#define HASH_CALC(s, h, val) \ |
||||
h = __crc32w(0, val) |
||||
|
||||
#define HASH_CALC_VAR h |
||||
#define HASH_CALC_VAR_INIT uint32_t h = 0 |
||||
|
||||
#define UPDATE_HASH Z_TARGET_CRC update_hash_acle |
||||
#define INSERT_STRING Z_TARGET_CRC insert_string_acle |
||||
#define QUICK_INSERT_STRING Z_TARGET_CRC quick_insert_string_acle |
||||
|
||||
#include "../../insert_string_tpl.h" |
||||
#endif |
@ -0,0 +1,54 @@ |
||||
/* adler32.c -- compute the Adler-32 checksum of a data stream
|
||||
* Copyright (C) 1995-2011, 2016 Mark Adler |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#include "zbuild.h" |
||||
#include "functable.h" |
||||
#include "adler32_p.h" |
||||
|
||||
/* ========================================================================= */ |
||||
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) { |
||||
uint32_t sum2; |
||||
unsigned n; |
||||
|
||||
/* split Adler-32 into component sums */ |
||||
sum2 = (adler >> 16) & 0xffff; |
||||
adler &= 0xffff; |
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */ |
||||
if (UNLIKELY(len == 1)) |
||||
return adler32_len_1(adler, buf, sum2); |
||||
|
||||
/* initial Adler-32 value (deferred check for len == 1 speed) */ |
||||
if (UNLIKELY(buf == NULL)) |
||||
return 1L; |
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */ |
||||
if (UNLIKELY(len < 16)) |
||||
return adler32_len_16(adler, buf, len, sum2); |
||||
|
||||
/* do length NMAX blocks -- requires just one modulo operation */ |
||||
while (len >= NMAX) { |
||||
len -= NMAX; |
||||
#ifdef UNROLL_MORE |
||||
n = NMAX / 16; /* NMAX is divisible by 16 */ |
||||
#else |
||||
n = NMAX / 8; /* NMAX is divisible by 8 */ |
||||
#endif |
||||
do { |
||||
#ifdef UNROLL_MORE |
||||
DO16(adler, sum2, buf); /* 16 sums unrolled */ |
||||
buf += 16; |
||||
#else |
||||
DO8(adler, sum2, buf, 0); /* 8 sums unrolled */ |
||||
buf += 8; |
||||
#endif |
||||
} while (--n); |
||||
adler %= BASE; |
||||
sum2 %= BASE; |
||||
} |
||||
|
||||
/* do remaining bytes (less than NMAX, still just one modulo) */ |
||||
return adler32_len_64(adler, buf, len, sum2); |
||||
} |
@ -0,0 +1,106 @@ |
||||
/* generic_functions.h -- generic C implementations for arch-specific functions.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#ifndef GENERIC_FUNCTIONS_H_ |
||||
#define GENERIC_FUNCTIONS_H_ |
||||
|
||||
#include "zendian.h" |
||||
|
||||
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc); |
||||
Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); |
||||
Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); |
||||
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc); |
||||
|
||||
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
||||
|
||||
|
||||
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len); |
||||
typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1); |
||||
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len); |
||||
|
||||
uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len); |
||||
|
||||
uint32_t chunksize_c(void); |
||||
uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left); |
||||
void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start); |
||||
|
||||
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); |
||||
|
||||
uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); |
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN |
||||
uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); |
||||
# ifdef HAVE_BUILTIN_CTZ |
||||
uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); |
||||
# endif |
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) |
||||
uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1); |
||||
# endif |
||||
#endif |
||||
|
||||
typedef void (*slide_hash_func)(deflate_state *s); |
||||
|
||||
void slide_hash_c(deflate_state *s); |
||||
|
||||
uint32_t longest_match_c(deflate_state *const s, Pos cur_match); |
||||
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN |
||||
uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); |
||||
# ifdef HAVE_BUILTIN_CTZ |
||||
uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); |
||||
# endif |
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) |
||||
uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); |
||||
# endif |
||||
# endif |
||||
|
||||
uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); |
||||
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN |
||||
uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match); |
||||
uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match); |
||||
# ifdef UNALIGNED64_OK |
||||
uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match); |
||||
# endif |
||||
# endif |
||||
|
||||
|
||||
// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN |
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) |
||||
# define longest_match_generic longest_match_unaligned_64 |
||||
# define longest_match_slow_generic longest_match_slow_unaligned_64 |
||||
# define compare256_generic compare256_unaligned_64 |
||||
# elif defined(HAVE_BUILTIN_CTZ) |
||||
# define longest_match_generic longest_match_unaligned_32 |
||||
# define longest_match_slow_generic longest_match_slow_unaligned_32 |
||||
# define compare256_generic compare256_unaligned_32 |
||||
# else |
||||
# define longest_match_generic longest_match_unaligned_16 |
||||
# define longest_match_slow_generic longest_match_slow_unaligned_16 |
||||
# define compare256_generic compare256_unaligned_16 |
||||
# endif |
||||
#else |
||||
# define longest_match_generic longest_match_c |
||||
# define longest_match_slow_generic longest_match_slow_c |
||||
# define compare256_generic compare256_c |
||||
#endif |
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION |
||||
// Generic code
|
||||
# define native_adler32 adler32_c |
||||
# define native_adler32_fold_copy adler32_fold_copy_c |
||||
# define native_chunkmemset_safe chunkmemset_safe_c |
||||
# define native_chunksize chunksize_c |
||||
# define native_crc32 PREFIX(crc32_braid) |
||||
# define native_crc32_fold crc32_fold_c |
||||
# define native_crc32_fold_copy crc32_fold_copy_c |
||||
# define native_crc32_fold_final crc32_fold_final_c |
||||
# define native_crc32_fold_reset crc32_fold_reset_c |
||||
# define native_inflate_fast inflate_fast_c |
||||
# define native_slide_hash slide_hash_c |
||||
# define native_longest_match longest_match_generic |
||||
# define native_longest_match_slow longest_match_slow_generic |
||||
# define native_compare256 compare256_generic |
||||
#endif |
||||
|
||||
#endif |
@ -1,6 +1,6 @@ |
||||
/* slide_hash.c -- slide hash table C implementation
|
||||
* |
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler |
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
@ -0,0 +1,67 @@ |
||||
/* power_functions.h -- POWER implementations for arch-specific functions.
|
||||
* Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM |
||||
* Copyright (C) 2021 Mika T. Lindqvist <postmaster@raasu.org> |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#ifndef POWER_FUNCTIONS_H_ |
||||
#define POWER_FUNCTIONS_H_ |
||||
|
||||
#ifdef PPC_VMX |
||||
uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len); |
||||
void slide_hash_vmx(deflate_state *s); |
||||
#endif |
||||
|
||||
#ifdef POWER8_VSX |
||||
uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len); |
||||
uint32_t chunksize_power8(void); |
||||
uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left); |
||||
uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); |
||||
void slide_hash_power8(deflate_state *s); |
||||
void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start); |
||||
#endif |
||||
|
||||
#ifdef POWER9 |
||||
uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1); |
||||
uint32_t longest_match_power9(deflate_state *const s, Pos cur_match); |
||||
uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match); |
||||
#endif |
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION |
||||
// Power - VMX
|
||||
# if defined(PPC_VMX) && defined(__ALTIVEC__) |
||||
# undef native_adler32 |
||||
# define native_adler32 adler32_vmx |
||||
# undef native_slide_hash |
||||
# define native_slide_hash slide_hash_vmx |
||||
# endif |
||||
// Power8 - VSX
|
||||
# if defined(POWER8_VSX) && defined(_ARCH_PWR8) && defined(__VSX__) |
||||
# undef native_adler32 |
||||
# define native_adler32 adler32_power8 |
||||
# undef native_chunkmemset_safe |
||||
# define native_chunkmemset_safe chunkmemset_safe_power8 |
||||
# undef native_chunksize |
||||
# define native_chunksize chunksize_power8 |
||||
# undef native_inflate_fast |
||||
# define native_inflate_fast inflate_fast_power8 |
||||
# undef native_slide_hash |
||||
# define native_slide_hash slide_hash_power8 |
||||
# endif |
||||
# if defined(POWER8_VSX_CRC32) && defined(_ARCH_PWR8) && defined(__VSX__) |
||||
# undef native_crc32 |
||||
# define native_crc32 crc32_power8 |
||||
# endif |
||||
// Power9
|
||||
# if defined(POWER9) && defined(_ARCH_PWR9) |
||||
# undef native_compare256 |
||||
# define native_compare256 compare256_power9 |
||||
# undef native_longest_match |
||||
# define native_longest_match longest_match_power9 |
||||
# undef native_longest_match_slow |
||||
# define native_longest_match_slow longest_match_slow_power9 |
||||
# endif |
||||
#endif |
||||
|
||||
#endif /* POWER_FUNCTIONS_H_ */ |
@ -0,0 +1,49 @@ |
||||
/* riscv_functions.h -- RISCV implementations for arch-specific functions.
|
||||
* |
||||
* Copyright (C) 2023 SiFive, Inc. All rights reserved. |
||||
* Contributed by Alex Chiang <alex.chiang@sifive.com> |
||||
* |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#ifndef RISCV_FUNCTIONS_H_ |
||||
#define RISCV_FUNCTIONS_H_ |
||||
|
||||
#ifdef RISCV_RVV |
||||
uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len); |
||||
uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
||||
uint32_t chunksize_rvv(void); |
||||
uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left); |
||||
uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1); |
||||
|
||||
uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match); |
||||
uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match); |
||||
void slide_hash_rvv(deflate_state *s); |
||||
void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start); |
||||
#endif |
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION |
||||
// RISCV - RVV
|
||||
# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__) |
||||
# undef native_adler32 |
||||
# define native_adler32 adler32_rvv |
||||
# undef native_adler32_fold_copy |
||||
# define native_adler32_fold_copy adler32_fold_copy_rvv |
||||
# undef native_chunkmemset_safe |
||||
# define native_chunkmemset_safe chunkmemset_safe_rvv |
||||
# undef native_chunksize |
||||
# define native_chunksize chunksize_rvv |
||||
# undef native_compare256 |
||||
# define native_compare256 compare256_rvv |
||||
# undef native_inflate_fast |
||||
# define native_inflate_fast inflate_fast_rvv |
||||
# undef native_longest_match |
||||
# define native_longest_match longest_match_rvv |
||||
# undef native_longest_match_slow |
||||
# define native_longest_match_slow longest_match_slow_rvv |
||||
# undef native_slide_hash |
||||
# define native_slide_hash slide_hash_rvv |
||||
# endif |
||||
#endif |
||||
|
||||
#endif /* RISCV_FUNCTIONS_H_ */ |
@ -0,0 +1,48 @@ |
||||
# Makefile for zlib-ng
|
||||
# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler
|
||||
# For conditions of distribution and use, see copyright notice in zlib.h
|
||||
|
||||
CC=
|
||||
CFLAGS=
|
||||
SFLAGS=
|
||||
INCLUDES=
|
||||
SUFFIX=
|
||||
VGFMAFLAG=
|
||||
NOLTOFLAG=
|
||||
|
||||
SRCDIR=.
|
||||
SRCTOP=../..
|
||||
TOPDIR=$(SRCTOP)
|
||||
|
||||
s390_features.o: |
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/s390_features.c
|
||||
|
||||
s390_features.lo: |
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/s390_features.c
|
||||
|
||||
dfltcc_deflate.o: |
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_deflate.c
|
||||
|
||||
dfltcc_deflate.lo: |
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_deflate.c
|
||||
|
||||
dfltcc_inflate.o: |
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_inflate.c
|
||||
|
||||
dfltcc_inflate.lo: |
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_inflate.c
|
||||
|
||||
crc32-vx.o: |
||||
$(CC) $(CFLAGS) $(VGFMAFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32-vx.c
|
||||
|
||||
crc32-vx.lo: |
||||
$(CC) $(SFLAGS) $(VGFMAFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32-vx.c
|
||||
|
||||
mostlyclean: clean |
||||
clean: |
||||
rm -f *.o *.lo *~
|
||||
rm -rf objs
|
||||
rm -f *.gcda *.gcno *.gcov
|
||||
|
||||
distclean: clean |
||||
rm -f Makefile
|
@ -0,0 +1,277 @@ |
||||
# Introduction |
||||
|
||||
This directory contains SystemZ deflate hardware acceleration support. |
||||
It can be enabled using the following build commands: |
||||
|
||||
$ ./configure --with-dfltcc-deflate --with-dfltcc-inflate |
||||
$ make |
||||
|
||||
or |
||||
|
||||
$ cmake -DWITH_DFLTCC_DEFLATE=1 -DWITH_DFLTCC_INFLATE=1 . |
||||
$ make |
||||
|
||||
When built like this, zlib-ng would compress using hardware on level 1, |
||||
and using software on all other levels. Decompression will always happen |
||||
in hardware. In order to enable hardware compression for levels 1-6 |
||||
(i.e. to make it used by default) one could add |
||||
`-DDFLTCC_LEVEL_MASK=0x7e` to CFLAGS when building zlib-ng. |
||||
|
||||
SystemZ deflate hardware acceleration is available on [IBM z15]( |
||||
https://www.ibm.com/products/z15) and newer machines under the name [ |
||||
"Integrated Accelerator for zEnterprise Data Compression"]( |
||||
https://www.ibm.com/support/z-content-solutions/compression/). The |
||||
programming interface to it is a machine instruction called DEFLATE |
||||
CONVERSION CALL (DFLTCC). It is documented in Chapter 26 of [Principles |
||||
of Operation](https://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf). Both |
||||
the code and the rest of this document refer to this feature simply as |
||||
"DFLTCC". |
||||
|
||||
# Performance |
||||
|
||||
Performance figures are published [here]( |
||||
https://github.com/iii-i/zlib-ng/wiki/Performance-with-dfltcc-patch-applied-and-dfltcc-support-built-on-dfltcc-enabled-machine |
||||
). The compression speed-up can be as high as 110x and the decompression |
||||
speed-up can be as high as 15x. |
||||
|
||||
# Limitations |
||||
|
||||
Two DFLTCC compression calls with identical inputs are not guaranteed to |
||||
produce identical outputs. Therefore care should be taken when using |
||||
hardware compression when reproducible results are desired. In |
||||
particular, zlib-ng-specific `zng_deflateSetParams` call allows setting |
||||
`Z_DEFLATE_REPRODUCIBLE` parameter, which disables DFLTCC support for a |
||||
particular stream. |
||||
|
||||
DFLTCC does not support every single zlib-ng feature, in particular: |
||||
|
||||
* `inflate(Z_BLOCK)` and `inflate(Z_TREES)` |
||||
* `inflateMark()` |
||||
* `inflatePrime()` |
||||
* `inflateSyncPoint()` |
||||
|
||||
When used, these functions will either switch to software, or, in case |
||||
this is not possible, gracefully fail. |
||||
|
||||
# Code structure |
||||
|
||||
All SystemZ-specific code lives in `arch/s390` directory and is |
||||
integrated with the rest of zlib-ng using hook macros. |
||||
|
||||
## Hook macros |
||||
|
||||
DFLTCC takes as arguments a parameter block, an input buffer, an output |
||||
buffer, and a window. Parameter blocks are stored alongside zlib states; |
||||
buffers are forwarded from the caller; and window - which must be |
||||
4k-aligned and is always 64k large, is managed using the `PAD_WINDOW()`, |
||||
`WINDOW_PAD_SIZE`, `HINT_ALIGNED_WINDOW` and `DEFLATE_ADJUST_WINDOW_SIZE()` |
||||
and `INFLATE_ADJUST_WINDOW_SIZE()` hooks. |
||||
|
||||
Software and hardware window formats do not match, therefore, |
||||
`deflateSetDictionary()`, `deflateGetDictionary()`, `inflateSetDictionary()` |
||||
and `inflateGetDictionary()` need special handling, which is triggered using |
||||
`DEFLATE_SET_DICTIONARY_HOOK()`, `DEFLATE_GET_DICTIONARY_HOOK()`, |
||||
`INFLATE_SET_DICTIONARY_HOOK()` and `INFLATE_GET_DICTIONARY_HOOK()` macros. |
||||
|
||||
`deflateResetKeep()` and `inflateResetKeep()` update the DFLTCC |
||||
parameter block using `DEFLATE_RESET_KEEP_HOOK()` and |
||||
`INFLATE_RESET_KEEP_HOOK()` macros. |
||||
|
||||
`INFLATE_PRIME_HOOK()`, `INFLATE_MARK_HOOK()` and |
||||
`INFLATE_SYNC_POINT_HOOK()` macros make the respective unsupported |
||||
calls gracefully fail. |
||||
|
||||
`DEFLATE_PARAMS_HOOK()` implements switching between hardware and |
||||
software compression mid-stream using `deflateParams()`. Switching |
||||
normally entails flushing the current block, which might not be possible |
||||
in low memory situations. `deflateParams()` uses `DEFLATE_DONE()` hook |
||||
in order to detect and gracefully handle such situations. |
||||
|
||||
The algorithm implemented in hardware has different compression ratio |
||||
than the one implemented in software. `DEFLATE_BOUND_ADJUST_COMPLEN()` |
||||
and `DEFLATE_NEED_CONSERVATIVE_BOUND()` macros make `deflateBound()` |
||||
return the correct results for the hardware implementation. |
||||
|
||||
Actual compression and decompression are handled by `DEFLATE_HOOK()` and |
||||
`INFLATE_TYPEDO_HOOK()` macros. Since inflation with DFLTCC manages the |
||||
window on its own, calling `updatewindow()` is suppressed using |
||||
`INFLATE_NEED_UPDATEWINDOW()` macro. |
||||
|
||||
In addition to compression, DFLTCC computes CRC-32 and Adler-32 |
||||
checksums, therefore, whenever it's used, software checksumming is |
||||
suppressed using `DEFLATE_NEED_CHECKSUM()` and `INFLATE_NEED_CHECKSUM()` |
||||
macros. |
||||
|
||||
While software always produces reproducible compression results, this |
||||
is not the case for DFLTCC. Therefore, zlib-ng users are given the |
||||
ability to specify whether or not reproducible compression results |
||||
are required. While it is always possible to specify this setting |
||||
before the compression begins, it is not always possible to do so in |
||||
the middle of a deflate stream - the exact conditions for that are |
||||
determined by `DEFLATE_CAN_SET_REPRODUCIBLE()` macro. |
||||
|
||||
## SystemZ-specific code |
||||
|
||||
When zlib-ng is built with DFLTCC, the hooks described above are |
||||
converted to calls to functions, which are implemented in |
||||
`arch/s390/dfltcc_*` files. The functions can be grouped in three broad |
||||
categories: |
||||
|
||||
* Base DFLTCC support, e.g. wrapping the machine instruction - `dfltcc()`. |
||||
* Translating between software and hardware data formats, e.g. |
||||
`dfltcc_deflate_set_dictionary()`. |
||||
* Translating between software and hardware state machines, e.g. |
||||
`dfltcc_deflate()` and `dfltcc_inflate()`. |
||||
|
||||
The functions from the first two categories are fairly simple, however, |
||||
various quirks in both software and hardware state machines make the |
||||
functions from the third category quite complicated. |
||||
|
||||
### `dfltcc_deflate()` function |
||||
|
||||
This function is called by `deflate()` and has the following |
||||
responsibilities: |
||||
|
||||
* Checking whether DFLTCC can be used with the current stream. If this |
||||
is not the case, then it returns `0`, making `deflate()` use some |
||||
other function in order to compress in software. Otherwise it returns |
||||
`1`. |
||||
* Block management and Huffman table generation. DFLTCC ends blocks only |
||||
when explicitly instructed to do so by the software. Furthermore, |
||||
whether to use fixed or dynamic Huffman tables must also be determined |
||||
by the software. Since looking at data in order to gather statistics |
||||
would negate performance benefits, the following approach is used: the |
||||
first `DFLTCC_FIRST_FHT_BLOCK_SIZE` bytes are placed into a fixed |
||||
block, and every next `DFLTCC_BLOCK_SIZE` bytes are placed into |
||||
dynamic blocks. |
||||
* Writing EOBS. Block Closing Control bit in the parameter block |
||||
instructs DFLTCC to write EOBS, however, certain conditions need to be |
||||
met: input data length must be non-zero or Continuation Flag must be |
||||
set. To put this in simpler terms, DFLTCC will silently refuse to |
||||
write EOBS if this is the only thing that it is asked to do. Since the |
||||
code has to be able to emit EOBS in software anyway, in order to avoid |
||||
tricky corner cases Block Closing Control is never used. Whether to |
||||
write EOBS is instead controlled by `soft_bcc` variable. |
||||
* Triggering block post-processing. Depending on flush mode, `deflate()` |
||||
must perform various additional actions when a block or a stream ends. |
||||
`dfltcc_deflate()` informs `deflate()` about this using |
||||
`block_state *result` parameter. |
||||
* Converting software state fields into hardware parameter block fields, |
||||
and vice versa. For example, `wrap` and Check Value Type or `bi_valid` |
||||
and Sub-Byte Boundary. Certain fields cannot be translated and must |
||||
persist untouched in the parameter block between calls, for example, |
||||
Continuation Flag or Continuation State Buffer. |
||||
* Handling flush modes and low-memory situations. These aspects are |
||||
quite intertwined and pervasive. The general idea here is that the |
||||
code must not do anything in software - whether explicitly by e.g. |
||||
calling `send_eobs()`, or implicitly - by returning to `deflate()` |
||||
with certain return and `*result` values, when Continuation Flag is |
||||
set. |
||||
* Ending streams. When a new block is started and flush mode is |
||||
`Z_FINISH`, Block Header Final parameter block bit is used to mark |
||||
this block as final. However, sometimes an empty final block is |
||||
needed, and, unfortunately, just like with EOBS, DFLTCC will silently |
||||
refuse to do this. The general idea of DFLTCC implementation is to |
||||
rely as much as possible on the existing code. Here in order to do |
||||
this, the code pretends that it does not support DFLTCC, which makes |
||||
`deflate()` call a software compression function, which writes an |
||||
empty final block. Whether this is required is controlled by |
||||
`need_empty_block` variable. |
||||
* Error handling. This is simply converting |
||||
Operation-Ending-Supplemental Code to string. Errors can only happen |
||||
due to things like memory corruption, and therefore they don't affect |
||||
the `deflate()` return code. |
||||
|
||||
### `dfltcc_inflate()` function |
||||
|
||||
This function is called by `inflate()` from the `TYPEDO` state (that is, |
||||
when all the metadata is parsed and the stream is positioned at the type |
||||
bits of deflate block header) and it's responsible for the following: |
||||
|
||||
* Falling back to software when flush mode is `Z_BLOCK` or `Z_TREES`. |
||||
Unfortunately, there is no way to ask DFLTCC to stop decompressing on |
||||
block or tree boundary. |
||||
* `inflate()` decompression loop management. This is controlled using |
||||
the return value, which can be either `DFLTCC_INFLATE_BREAK` or |
||||
`DFLTCC_INFLATE_CONTINUE`. |
||||
* Converting software state fields into hardware parameter block fields, |
||||
and vice versa. For example, `whave` and History Length or `wnext` and |
||||
History Offset. |
||||
* Ending streams. This instructs `inflate()` to return `Z_STREAM_END` |
||||
and is controlled by `last` state field. |
||||
* Error handling. Like deflate, error handling comprises |
||||
Operation-Ending-Supplemental Code to string conversion. Unlike |
||||
deflate, errors may happen due to bad inputs, therefore they are |
||||
propagated to `inflate()` by setting `mode` field to `MEM` or `BAD`. |
||||
|
||||
# Testing |
||||
|
||||
Given complexity of DFLTCC machine instruction, it is not clear whether |
||||
QEMU TCG will ever support it. At the time of writing, one has to have |
||||
access to an IBM z15+ VM or LPAR in order to test DFLTCC support. Since |
||||
DFLTCC is a non-privileged instruction, neither special VM/LPAR |
||||
configuration nor root are required. |
||||
|
||||
zlib-ng CI uses an IBM-provided z15 self-hosted builder for the DFLTCC |
||||
testing. There is no official IBM Z GitHub Actions runner, so we build |
||||
one inspired by `anup-kodlekere/gaplib`. |
||||
Future updates to actions-runner might need an updated patch. The .net |
||||
version number patch has been separated into a separate file to avoid a |
||||
need for constantly changing the patch. |
||||
|
||||
## Configuring the builder. |
||||
|
||||
### Install prerequisites. |
||||
|
||||
``` |
||||
sudo dnf install podman |
||||
``` |
||||
|
||||
### Add actions-runner service. |
||||
|
||||
``` |
||||
sudo cp self-hosted-builder/actions-runner.service /etc/systemd/system/ |
||||
sudo systemctl daemon-reload |
||||
``` |
||||
|
||||
### Create a config file, needs github personal access token. |
||||
|
||||
``` |
||||
# Create file /etc/actions-runner |
||||
repo=<owner>/<name> |
||||
access_token=<ghp_***> |
||||
``` |
||||
|
||||
Access token should have the repo scope, consult |
||||
https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-a-repository |
||||
for details. |
||||
|
||||
### Autostart actions-runner. |
||||
|
||||
``` |
||||
$ sudo systemctl enable --now actions-runner |
||||
``` |
||||
|
||||
## Rebuilding the container |
||||
|
||||
In order to update the `gaplib-actions-runner` podman container, e.g. to get the |
||||
latest OS security fixes, follow these steps: |
||||
``` |
||||
# Stop actions-runner service |
||||
sudo systemctl stop actions-runner |
||||
|
||||
# Delete old container |
||||
sudo podman container rm gaplib-actions-runner |
||||
|
||||
# Delete old image |
||||
sudo podman image rm localhost/zlib-ng/actions-runner |
||||
|
||||
# Build image |
||||
sudo podman build --squash -f Dockerfile.zlib-ng --tag zlib-ng/actions-runner --build-arg . |
||||
|
||||
# Build container |
||||
sudo podman create --name=gaplib-actions-runner --env-file=/etc/actions-runner --init --interactive --volume=actions-runner-temp:/home/actions-runner zlib-ng/actions-runner |
||||
|
||||
# Start actions-runner service |
||||
sudo systemctl start actions-runner |
||||
``` |
@ -0,0 +1,222 @@ |
||||
/*
|
||||
* Hardware-accelerated CRC-32 variants for Linux on z Systems |
||||
* |
||||
* Use the z/Architecture Vector Extension Facility to accelerate the |
||||
* computing of bitreflected CRC-32 checksums. |
||||
* |
||||
* This CRC-32 implementation algorithm is bitreflected and processes |
||||
* the least-significant bit first (Little-Endian). |
||||
* |
||||
* This code was originally written by Hendrik Brueckner |
||||
* <brueckner@linux.vnet.ibm.com> for use in the Linux kernel and has been |
||||
* relicensed under the zlib license. |
||||
*/ |
||||
|
||||
#include "zbuild.h" |
||||
#include "arch_functions.h" |
||||
|
||||
#include <vecintrin.h> |
||||
|
||||
typedef unsigned char uv16qi __attribute__((vector_size(16))); |
||||
typedef unsigned int uv4si __attribute__((vector_size(16))); |
||||
typedef unsigned long long uv2di __attribute__((vector_size(16))); |
||||
|
||||
static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, size_t len) { |
||||
/*
|
||||
* The CRC-32 constant block contains reduction constants to fold and |
||||
* process particular chunks of the input data stream in parallel. |
||||
* |
||||
* For the CRC-32 variants, the constants are precomputed according to |
||||
* these definitions: |
||||
* |
||||
* R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 |
||||
* R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 |
||||
* R3 = [(x128+32 mod P'(x) << 32)]' << 1 |
||||
* R4 = [(x128-32 mod P'(x) << 32)]' << 1 |
||||
* R5 = [(x64 mod P'(x) << 32)]' << 1 |
||||
* R6 = [(x32 mod P'(x) << 32)]' << 1 |
||||
* |
||||
* The bitreflected Barret reduction constant, u', is defined as |
||||
* the bit reversal of floor(x**64 / P(x)). |
||||
* |
||||
* where P(x) is the polynomial in the normal domain and the P'(x) is the |
||||
* polynomial in the reversed (bitreflected) domain. |
||||
* |
||||
* CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: |
||||
* |
||||
* P(x) = 0x04C11DB7 |
||||
* P'(x) = 0xEDB88320 |
||||
*/ |
||||
const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */ |
||||
const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */ |
||||
const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */ |
||||
const uv2di r5 = {0, 0x163CD6124}; /* R5 */ |
||||
const uv2di ru_poly = {0, 0x1F7011641}; /* u' */ |
||||
const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */ |
||||
|
||||
/*
|
||||
* Load the initial CRC value. |
||||
* |
||||
* The CRC value is loaded into the rightmost word of the |
||||
* vector register and is later XORed with the LSB portion |
||||
* of the loaded input data. |
||||
*/ |
||||
uv2di v0 = {0, 0}; |
||||
v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3); |
||||
|
||||
/* Load a 64-byte data chunk and XOR with CRC */ |
||||
uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be); |
||||
uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be); |
||||
uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be); |
||||
uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be); |
||||
|
||||
v1 ^= v0; |
||||
buf += 64; |
||||
len -= 64; |
||||
|
||||
while (len >= 64) { |
||||
/* Load the next 64-byte data chunk */ |
||||
uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be); |
||||
uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be); |
||||
uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be); |
||||
uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be); |
||||
|
||||
/*
|
||||
* Perform a GF(2) multiplication of the doublewords in V1 with |
||||
* the R1 and R2 reduction constants in V0. The intermediate result |
||||
* is then folded (accumulated) with the next data chunk in PART1 and |
||||
* stored in V1. Repeat this step for the register contents |
||||
* in V2, V3, and V4 respectively. |
||||
*/ |
||||
v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1); |
||||
v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2); |
||||
v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3); |
||||
v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4); |
||||
|
||||
buf += 64; |
||||
len -= 64; |
||||
} |
||||
|
||||
/*
|
||||
* Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 |
||||
* and R4 and accumulating the next 128-bit chunk until a single 128-bit |
||||
* value remains. |
||||
*/ |
||||
v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); |
||||
v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3); |
||||
v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4); |
||||
|
||||
while (len >= 16) { |
||||
/* Load next data chunk */ |
||||
v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be); |
||||
|
||||
/* Fold next data chunk */ |
||||
v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); |
||||
|
||||
buf += 16; |
||||
len -= 16; |
||||
} |
||||
|
||||
/*
|
||||
* Set up a vector register for byte shifts. The shift value must |
||||
* be loaded in bits 1-4 in byte element 7 of a vector register. |
||||
* Shift by 8 bytes: 0x40 |
||||
* Shift by 4 bytes: 0x20 |
||||
*/ |
||||
uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
||||
v9 = vec_insert((unsigned char)0x40, v9, 7); |
||||
|
||||
/*
|
||||
* Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes |
||||
* to move R4 into the rightmost doubleword and set the leftmost |
||||
* doubleword to 0x1. |
||||
*/ |
||||
v0 = vec_srb(r4r3, (uv2di)v9); |
||||
v0[0] = 1; |
||||
|
||||
/*
|
||||
* Compute GF(2) product of V1 and V0. The rightmost doubleword |
||||
* of V1 is multiplied with R4. The leftmost doubleword of V1 is |
||||
* multiplied by 0x1 and is then XORed with rightmost product. |
||||
* Implicitly, the intermediate leftmost product becomes padded |
||||
*/ |
||||
v1 = (uv2di)vec_gfmsum_128(v0, v1); |
||||
|
||||
/*
|
||||
* Now do the final 32-bit fold by multiplying the rightmost word |
||||
* in V1 with R5 and XOR the result with the remaining bits in V1. |
||||
* |
||||
* To achieve this by a single VGFMAG, right shift V1 by a word |
||||
* and store the result in V2 which is then accumulated. Use the |
||||
* vector unpack instruction to load the rightmost half of the |
||||
* doubleword into the rightmost doubleword element of V1; the other |
||||
* half is loaded in the leftmost doubleword. |
||||
* The vector register with CONST_R5 contains the R5 constant in the |
||||
* rightmost doubleword and the leftmost doubleword is zero to ignore |
||||
* the leftmost product of V1. |
||||
*/ |
||||
v9 = vec_insert((unsigned char)0x20, v9, 7); |
||||
v2 = vec_srb(v1, (uv2di)v9); |
||||
v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */ |
||||
v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2); |
||||
|
||||
/*
|
||||
* Apply a Barret reduction to compute the final 32-bit CRC value. |
||||
* |
||||
* The input values to the Barret reduction are the degree-63 polynomial |
||||
* in V1 (R(x)), degree-32 generator polynomial, and the reduction |
||||
* constant u. The Barret reduction result is the CRC value of R(x) mod |
||||
* P(x). |
||||
* |
||||
* The Barret reduction algorithm is defined as: |
||||
* |
||||
* 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u |
||||
* 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) |
||||
* 3. C(x) = R(x) XOR T2(x) mod x^32 |
||||
* |
||||
* Note: The leftmost doubleword of vector register containing |
||||
* CONST_RU_POLY is zero and, thus, the intermediate GF(2) product |
||||
* is zero and does not contribute to the final result. |
||||
*/ |
||||
|
||||
/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ |
||||
v2 = vec_unpackl((uv4si)v1); |
||||
v2 = (uv2di)vec_gfmsum_128(ru_poly, v2); |
||||
|
||||
/*
|
||||
* Compute the GF(2) product of the CRC polynomial with T1(x) in |
||||
* V2 and XOR the intermediate result, T2(x), with the value in V1. |
||||
* The final result is stored in word element 2 of V2. |
||||
*/ |
||||
v2 = vec_unpackl((uv4si)v2); |
||||
v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1); |
||||
|
||||
return ((uv4si)v2)[2]; |
||||
} |
||||
|
||||
#define VX_MIN_LEN 64 |
||||
#define VX_ALIGNMENT 16L |
||||
#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) |
||||
|
||||
uint32_t Z_INTERNAL crc32_s390_vx(uint32_t crc, const unsigned char *buf, size_t len) { |
||||
size_t prealign, aligned, remaining; |
||||
|
||||
if (len < VX_MIN_LEN + VX_ALIGN_MASK) |
||||
return PREFIX(crc32_braid)(crc, buf, len); |
||||
|
||||
if ((uintptr_t)buf & VX_ALIGN_MASK) { |
||||
prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK); |
||||
len -= prealign; |
||||
crc = PREFIX(crc32_braid)(crc, buf, prealign); |
||||
buf += prealign; |
||||
} |
||||
aligned = len & ~VX_ALIGN_MASK; |
||||
remaining = len & VX_ALIGN_MASK; |
||||
|
||||
crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, aligned) ^ 0xffffffff; |
||||
|
||||
if (remaining) |
||||
crc = PREFIX(crc32_braid)(crc, buf + aligned, remaining); |
||||
|
||||
return crc; |
||||
} |
@ -0,0 +1,119 @@ |
||||
#ifndef DFLTCC_COMMON_H |
||||
#define DFLTCC_COMMON_H |
||||
|
||||
#include "zutil.h" |
||||
|
||||
/*
|
||||
Parameter Block for Query Available Functions. |
||||
*/ |
||||
struct dfltcc_qaf_param { |
||||
char fns[16]; |
||||
char reserved1[8]; |
||||
char fmts[2]; |
||||
char reserved2[6]; |
||||
} ALIGNED_(8); |
||||
|
||||
/*
|
||||
Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand. |
||||
*/ |
||||
struct dfltcc_param_v0 { |
||||
uint16_t pbvn; /* Parameter-Block-Version Number */ |
||||
uint8_t mvn; /* Model-Version Number */ |
||||
uint8_t ribm; /* Reserved for IBM use */ |
||||
uint32_t reserved32 : 31; |
||||
uint32_t cf : 1; /* Continuation Flag */ |
||||
uint8_t reserved64[8]; |
||||
uint32_t nt : 1; /* New Task */ |
||||
uint32_t reserved129 : 1; |
||||
uint32_t cvt : 1; /* Check Value Type */ |
||||
uint32_t reserved131 : 1; |
||||
uint32_t htt : 1; /* Huffman-Table Type */ |
||||
uint32_t bcf : 1; /* Block-Continuation Flag */ |
||||
uint32_t bcc : 1; /* Block Closing Control */ |
||||
uint32_t bhf : 1; /* Block Header Final */ |
||||
uint32_t reserved136 : 1; |
||||
uint32_t reserved137 : 1; |
||||
uint32_t dhtgc : 1; /* DHT Generation Control */ |
||||
uint32_t reserved139 : 5; |
||||
uint32_t reserved144 : 5; |
||||
uint32_t sbb : 3; /* Sub-Byte Boundary */ |
||||
uint8_t oesc; /* Operation-Ending-Supplemental Code */ |
||||
uint32_t reserved160 : 12; |
||||
uint32_t ifs : 4; /* Incomplete-Function Status */ |
||||
uint16_t ifl; /* Incomplete-Function Length */ |
||||
uint8_t reserved192[8]; |
||||
uint8_t reserved256[8]; |
||||
uint8_t reserved320[4]; |
||||
uint16_t hl; /* History Length */ |
||||
uint32_t reserved368 : 1; |
||||
uint16_t ho : 15; /* History Offset */ |
||||
uint32_t cv; /* Check Value */ |
||||
uint32_t eobs : 15; /* End-of-block Symbol */ |
||||
uint32_t reserved431: 1; |
||||
uint8_t eobl : 4; /* End-of-block Length */ |
||||
uint32_t reserved436 : 12; |
||||
uint32_t reserved448 : 4; |
||||
uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table
|
||||
Length */ |
||||
uint8_t reserved464[6]; |
||||
uint8_t cdht[288]; /* Compressed-Dynamic-Huffman Table */ |
||||
uint8_t reserved[24]; |
||||
uint8_t ribm2[8]; /* Reserved for IBM use */ |
||||
uint8_t csb[1152]; /* Continuation-State Buffer */ |
||||
} ALIGNED_(8); |
||||
|
||||
/*
|
||||
Extension of inflate_state and deflate_state. |
||||
*/ |
||||
struct dfltcc_state { |
||||
struct dfltcc_param_v0 param; /* Parameter block. */ |
||||
struct dfltcc_qaf_param af; /* Available functions. */ |
||||
char msg[64]; /* Buffer for strm->msg */ |
||||
}; |
||||
|
||||
typedef struct { |
||||
struct dfltcc_state common; |
||||
uint16_t level_mask; /* Levels on which to use DFLTCC */ |
||||
uint32_t block_size; /* New block each X bytes */ |
||||
size_t block_threshold; /* New block after total_in > X */ |
||||
uint32_t dht_threshold; /* New block only if avail_in >= X */ |
||||
} arch_deflate_state; |
||||
|
||||
typedef struct { |
||||
struct dfltcc_state common; |
||||
} arch_inflate_state; |
||||
|
||||
/*
|
||||
History buffer size. |
||||
*/ |
||||
#define HB_BITS 15 |
||||
#define HB_SIZE (1 << HB_BITS) |
||||
|
||||
/*
|
||||
Sizes of deflate block parts. |
||||
*/ |
||||
#define DFLTCC_BLOCK_HEADER_BITS 3 |
||||
#define DFLTCC_HLITS_COUNT_BITS 5 |
||||
#define DFLTCC_HDISTS_COUNT_BITS 5 |
||||
#define DFLTCC_HCLENS_COUNT_BITS 4 |
||||
#define DFLTCC_MAX_HCLENS 19 |
||||
#define DFLTCC_HCLEN_BITS 3 |
||||
#define DFLTCC_MAX_HLITS 286 |
||||
#define DFLTCC_MAX_HDISTS 30 |
||||
#define DFLTCC_MAX_HLIT_HDIST_BITS 7 |
||||
#define DFLTCC_MAX_SYMBOL_BITS 16 |
||||
#define DFLTCC_MAX_EOBS_BITS 15 |
||||
#define DFLTCC_MAX_PADDING_BITS 7 |
||||
|
||||
#define DEFLATE_BOUND_COMPLEN(source_len) \ |
||||
((DFLTCC_BLOCK_HEADER_BITS + \
|
||||
DFLTCC_HLITS_COUNT_BITS + \
|
||||
DFLTCC_HDISTS_COUNT_BITS + \
|
||||
DFLTCC_HCLENS_COUNT_BITS + \
|
||||
DFLTCC_MAX_HCLENS * DFLTCC_HCLEN_BITS + \
|
||||
(DFLTCC_MAX_HLITS + DFLTCC_MAX_HDISTS) * DFLTCC_MAX_HLIT_HDIST_BITS + \
|
||||
(source_len) * DFLTCC_MAX_SYMBOL_BITS + \
|
||||
DFLTCC_MAX_EOBS_BITS + \
|
||||
DFLTCC_MAX_PADDING_BITS) >> 3) |
||||
|
||||
#endif |
@ -0,0 +1,383 @@ |
||||
/* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL compression support. */ |
||||
|
||||
/*
|
||||
Use the following commands to build zlib-ng with DFLTCC compression support: |
||||
|
||||
$ ./configure --with-dfltcc-deflate |
||||
or |
||||
|
||||
$ cmake -DWITH_DFLTCC_DEFLATE=1 . |
||||
|
||||
and then |
||||
|
||||
$ make |
||||
*/ |
||||
|
||||
#include "zbuild.h" |
||||
#include "deflate.h" |
||||
#include "trees_emit.h" |
||||
#include "dfltcc_deflate.h" |
||||
#include "dfltcc_detail.h" |
||||
|
||||
void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp) strm) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
arch_deflate_state *dfltcc_state = &state->arch; |
||||
|
||||
dfltcc_reset_state(&dfltcc_state->common); |
||||
|
||||
/* Initialize tuning parameters */ |
||||
dfltcc_state->level_mask = DFLTCC_LEVEL_MASK; |
||||
dfltcc_state->block_size = DFLTCC_BLOCK_SIZE; |
||||
dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE; |
||||
dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; |
||||
} |
||||
|
||||
static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy, |
||||
int reproducible) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
arch_deflate_state *dfltcc_state = &state->arch; |
||||
|
||||
/* Unsupported compression settings */ |
||||
if ((dfltcc_state->level_mask & (1 << level)) == 0) |
||||
return 0; |
||||
if (window_bits != HB_BITS) |
||||
return 0; |
||||
if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY) |
||||
return 0; |
||||
if (reproducible) |
||||
return 0; |
||||
|
||||
/* Unsupported hardware */ |
||||
if (!is_bit_set(dfltcc_state->common.af.fns, DFLTCC_GDHT) || |
||||
!is_bit_set(dfltcc_state->common.af.fns, DFLTCC_CMPR) || |
||||
!is_bit_set(dfltcc_state->common.af.fmts, DFLTCC_FMT0)) |
||||
return 0; |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
|
||||
return dfltcc_can_deflate_with_params(strm, state->level, state->w_bits, state->strategy, state->reproducible); |
||||
} |
||||
|
||||
static inline void dfltcc_gdht(PREFIX3(streamp) strm) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
struct dfltcc_param_v0 *param = &state->arch.common.param; |
||||
size_t avail_in = strm->avail_in; |
||||
|
||||
dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL); |
||||
} |
||||
|
||||
static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
struct dfltcc_param_v0 *param = &state->arch.common.param; |
||||
size_t avail_in = strm->avail_in; |
||||
size_t avail_out = strm->avail_out; |
||||
dfltcc_cc cc; |
||||
|
||||
cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR, |
||||
param, &strm->next_out, &avail_out, |
||||
&strm->next_in, &avail_in, state->window); |
||||
strm->total_in += (strm->avail_in - avail_in); |
||||
strm->total_out += (strm->avail_out - avail_out); |
||||
strm->avail_in = avail_in; |
||||
strm->avail_out = avail_out; |
||||
return cc; |
||||
} |
||||
|
||||
static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
|
||||
send_bits(state, PREFIX(bi_reverse)(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid); |
||||
PREFIX(flush_pending)(strm); |
||||
if (state->pending != 0) { |
||||
/* The remaining data is located in pending_out[0:pending]. If someone
|
||||
* calls put_byte() - this might happen in deflate() - the byte will be |
||||
* placed into pending_buf[pending], which is incorrect. Move the |
||||
* remaining data to the beginning of pending_buf so that put_byte() is |
||||
* usable again. |
||||
*/ |
||||
memmove(state->pending_buf, state->pending_out, state->pending); |
||||
state->pending_out = state->pending_buf; |
||||
} |
||||
#ifdef ZLIB_DEBUG |
||||
state->compressed_len += param->eobl; |
||||
#endif |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
arch_deflate_state *dfltcc_state = &state->arch; |
||||
struct dfltcc_param_v0 *param = &dfltcc_state->common.param; |
||||
uInt masked_avail_in; |
||||
dfltcc_cc cc; |
||||
int need_empty_block; |
||||
int soft_bcc; |
||||
int no_flush; |
||||
|
||||
if (!PREFIX(dfltcc_can_deflate)(strm)) { |
||||
/* Clear history. */ |
||||
if (flush == Z_FULL_FLUSH) |
||||
param->hl = 0; |
||||
return 0; |
||||
} |
||||
|
||||
again: |
||||
masked_avail_in = 0; |
||||
soft_bcc = 0; |
||||
no_flush = flush == Z_NO_FLUSH; |
||||
|
||||
/* No input data. Return, except when Continuation Flag is set, which means
|
||||
* that DFLTCC has buffered some output in the parameter block and needs to |
||||
* be called again in order to flush it. |
||||
*/ |
||||
if (strm->avail_in == 0 && !param->cf) { |
||||
/* A block is still open, and the hardware does not support closing
|
||||
* blocks without adding data. Thus, close it manually. |
||||
*/ |
||||
if (!no_flush && param->bcf) { |
||||
send_eobs(strm, param); |
||||
param->bcf = 0; |
||||
} |
||||
/* Let one of deflate_* functions write a trailing empty block. */ |
||||
if (flush == Z_FINISH) |
||||
return 0; |
||||
/* Clear history. */ |
||||
if (flush == Z_FULL_FLUSH) |
||||
param->hl = 0; |
||||
/* Trigger block post-processing if necessary. */ |
||||
*result = no_flush ? need_more : block_done; |
||||
return 1; |
||||
} |
||||
|
||||
/* There is an open non-BFINAL block, we are not going to close it just
|
||||
* yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see |
||||
* more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new |
||||
* DHT in order to adapt to a possibly changed input data distribution. |
||||
*/ |
||||
if (param->bcf && no_flush && |
||||
strm->total_in > dfltcc_state->block_threshold && |
||||
strm->avail_in >= dfltcc_state->dht_threshold) { |
||||
if (param->cf) { |
||||
/* We need to flush the DFLTCC buffer before writing the
|
||||
* End-of-block Symbol. Mask the input data and proceed as usual. |
||||
*/ |
||||
masked_avail_in += strm->avail_in; |
||||
strm->avail_in = 0; |
||||
no_flush = 0; |
||||
} else { |
||||
/* DFLTCC buffer is empty, so we can manually write the
|
||||
* End-of-block Symbol right away. |
||||
*/ |
||||
send_eobs(strm, param); |
||||
param->bcf = 0; |
||||
dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size; |
||||
} |
||||
} |
||||
|
||||
/* No space for compressed data. If we proceed, dfltcc_cmpr() will return
|
||||
* DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still |
||||
* set BCF=1, which is wrong. Avoid complications and return early. |
||||
*/ |
||||
if (strm->avail_out == 0) { |
||||
*result = need_more; |
||||
return 1; |
||||
} |
||||
|
||||
/* The caller gave us too much data. Pass only one block worth of
|
||||
* uncompressed data to DFLTCC and mask the rest, so that on the next |
||||
* iteration we start a new block. |
||||
*/ |
||||
if (no_flush && strm->avail_in > dfltcc_state->block_size) { |
||||
masked_avail_in += (strm->avail_in - dfltcc_state->block_size); |
||||
strm->avail_in = dfltcc_state->block_size; |
||||
} |
||||
|
||||
/* When we have an open non-BFINAL deflate block and caller indicates that
|
||||
* the stream is ending, we need to close an open deflate block and open a |
||||
* BFINAL one. |
||||
*/ |
||||
need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf; |
||||
|
||||
/* Translate stream to parameter block */ |
||||
param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32; |
||||
if (!no_flush) |
||||
/* We need to close a block. Always do this in software - when there is
|
||||
* no input data, the hardware will not honor BCC. */ |
||||
soft_bcc = 1; |
||||
if (flush == Z_FINISH && !param->bcf) |
||||
/* We are about to open a BFINAL block, set Block Header Final bit
|
||||
* until the stream ends. |
||||
*/ |
||||
param->bhf = 1; |
||||
/* DFLTCC-CMPR will write to next_out, so make sure that buffers with
|
||||
* higher precedence are empty. |
||||
*/ |
||||
Assert(state->pending == 0, "There must be no pending bytes"); |
||||
Assert(state->bi_valid < 8, "There must be less than 8 pending bits"); |
||||
param->sbb = (unsigned int)state->bi_valid; |
||||
if (param->sbb > 0) |
||||
*strm->next_out = (unsigned char)state->bi_buf; |
||||
/* Honor history and check value */ |
||||
param->nt = 0; |
||||
if (state->wrap == 1) |
||||
param->cv = strm->adler; |
||||
else if (state->wrap == 2) |
||||
param->cv = ZSWAP32(state->crc_fold.value); |
||||
|
||||
/* When opening a block, choose a Huffman-Table Type */ |
||||
if (!param->bcf) { |
||||
if (state->strategy == Z_FIXED || (strm->total_in == 0 && dfltcc_state->block_threshold > 0)) |
||||
param->htt = HTT_FIXED; |
||||
else { |
||||
param->htt = HTT_DYNAMIC; |
||||
dfltcc_gdht(strm); |
||||
} |
||||
} |
||||
|
||||
/* Deflate */ |
||||
do { |
||||
cc = dfltcc_cmpr(strm); |
||||
if (strm->avail_in < 4096 && masked_avail_in > 0) |
||||
/* We are about to call DFLTCC with a small input buffer, which is
|
||||
* inefficient. Since there is masked data, there will be at least |
||||
* one more DFLTCC call, so skip the current one and make the next |
||||
* one handle more data. |
||||
*/ |
||||
break; |
||||
} while (cc == DFLTCC_CC_AGAIN); |
||||
|
||||
/* Translate parameter block to stream */ |
||||
strm->msg = oesc_msg(dfltcc_state->common.msg, param->oesc); |
||||
state->bi_valid = param->sbb; |
||||
if (state->bi_valid == 0) |
||||
state->bi_buf = 0; /* Avoid accessing next_out */ |
||||
else |
||||
state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1); |
||||
if (state->wrap == 1) |
||||
strm->adler = param->cv; |
||||
else if (state->wrap == 2) |
||||
state->crc_fold.value = ZSWAP32(param->cv); |
||||
|
||||
/* Unmask the input data */ |
||||
strm->avail_in += masked_avail_in; |
||||
masked_avail_in = 0; |
||||
|
||||
/* If we encounter an error, it means there is a bug in DFLTCC call */ |
||||
Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG"); |
||||
|
||||
/* Update Block-Continuation Flag. It will be used to check whether to call
|
||||
* GDHT the next time. |
||||
*/ |
||||
if (cc == DFLTCC_CC_OK) { |
||||
if (soft_bcc) { |
||||
send_eobs(strm, param); |
||||
param->bcf = 0; |
||||
dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size; |
||||
} else |
||||
param->bcf = 1; |
||||
if (flush == Z_FINISH) { |
||||
if (need_empty_block) |
||||
/* Make the current deflate() call also close the stream */ |
||||
return 0; |
||||
else { |
||||
bi_windup(state); |
||||
*result = finish_done; |
||||
} |
||||
} else { |
||||
if (flush == Z_FULL_FLUSH) |
||||
param->hl = 0; /* Clear history */ |
||||
*result = flush == Z_NO_FLUSH ? need_more : block_done; |
||||
} |
||||
} else { |
||||
param->bcf = 1; |
||||
*result = need_more; |
||||
} |
||||
if (strm->avail_in != 0 && strm->avail_out != 0) |
||||
goto again; /* deflate() must use all input or all output */ |
||||
return 1; |
||||
} |
||||
|
||||
/*
|
||||
Switching between hardware and software compression. |
||||
|
||||
DFLTCC does not support all zlib settings, e.g. generation of non-compressed |
||||
blocks or alternative window sizes. When such settings are applied on the |
||||
fly with deflateParams, we need to convert between hardware and software |
||||
window formats. |
||||
*/ |
||||
static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
struct dfltcc_param_v0 *param = &state->arch.common.param; |
||||
|
||||
return strm->total_in > 0 || param->nt == 0 || param->hl > 0; |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
int could_deflate = PREFIX(dfltcc_can_deflate)(strm); |
||||
int can_deflate = dfltcc_can_deflate_with_params(strm, level, state->w_bits, strategy, state->reproducible); |
||||
|
||||
if (can_deflate == could_deflate) |
||||
/* We continue to work in the same mode - no changes needed */ |
||||
return Z_OK; |
||||
|
||||
if (!dfltcc_was_deflate_used(strm)) |
||||
/* DFLTCC was not used yet - no changes needed */ |
||||
return Z_OK; |
||||
|
||||
/* For now, do not convert between window formats - simply get rid of the old data instead */ |
||||
*flush = Z_FULL_FLUSH; |
||||
return Z_OK; |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
struct dfltcc_param_v0 *param = &state->arch.common.param; |
||||
|
||||
/* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might
|
||||
* close the block without resetting the compression state. Detect this |
||||
* situation and return that deflation is not done. |
||||
*/ |
||||
if (flush == Z_FULL_FLUSH && strm->avail_out == 0) |
||||
return 0; |
||||
|
||||
/* Return that deflation is not done if DFLTCC is used and either it
|
||||
* buffered some data (Continuation Flag is set), or has not written EOBS |
||||
* yet (Block-Continuation Flag is set). |
||||
*/ |
||||
return !PREFIX(dfltcc_can_deflate)(strm) || (!param->cf && !param->bcf); |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
|
||||
return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm); |
||||
} |
||||
|
||||
/*
|
||||
Preloading history. |
||||
*/ |
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, |
||||
const unsigned char *dictionary, uInt dict_length) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
struct dfltcc_param_v0 *param = &state->arch.common.param; |
||||
|
||||
append_history(param, state->window, dictionary, dict_length); |
||||
state->strstart = 1; /* Add FDICT to zlib header */ |
||||
state->block_start = state->strstart; /* Make deflate_stored happy */ |
||||
return Z_OK; |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) { |
||||
deflate_state *state = (deflate_state *)strm->state; |
||||
struct dfltcc_param_v0 *param = &state->arch.common.param; |
||||
|
||||
if (dictionary) |
||||
get_history(param, state->window, dictionary); |
||||
if (dict_length) |
||||
*dict_length = param->hl; |
||||
return Z_OK; |
||||
} |
@ -0,0 +1,58 @@ |
||||
#ifndef DFLTCC_DEFLATE_H |
||||
#define DFLTCC_DEFLATE_H |
||||
|
||||
#include "deflate.h" |
||||
#include "dfltcc_common.h" |
||||
|
||||
void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp)); |
||||
int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm); |
||||
int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result); |
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush); |
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush); |
||||
int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible); |
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, |
||||
const unsigned char *dictionary, uInt dict_length); |
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length); |
||||
|
||||
#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ |
||||
do { \
|
||||
if (PREFIX(dfltcc_can_deflate)((strm))) \
|
||||
return PREFIX(dfltcc_deflate_set_dictionary)((strm), (dict), (dict_len)); \
|
||||
} while (0) |
||||
|
||||
#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ |
||||
do { \
|
||||
if (PREFIX(dfltcc_can_deflate)((strm))) \
|
||||
return PREFIX(dfltcc_deflate_get_dictionary)((strm), (dict), (dict_len)); \
|
||||
} while (0) |
||||
|
||||
#define DEFLATE_RESET_KEEP_HOOK PREFIX(dfltcc_reset_deflate_state) |
||||
|
||||
#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \ |
||||
do { \
|
||||
int err; \
|
||||
\
|
||||
err = PREFIX(dfltcc_deflate_params)((strm), (level), (strategy), (hook_flush)); \
|
||||
if (err == Z_STREAM_ERROR) \
|
||||
return err; \
|
||||
} while (0) |
||||
|
||||
#define DEFLATE_DONE PREFIX(dfltcc_deflate_done) |
||||
|
||||
#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ |
||||
do { \
|
||||
if (deflateStateCheck((strm)) || PREFIX(dfltcc_can_deflate)((strm))) \
|
||||
(complen) = DEFLATE_BOUND_COMPLEN(source_len); \
|
||||
} while (0) |
||||
|
||||
#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (PREFIX(dfltcc_can_deflate)((strm))) |
||||
|
||||
#define DEFLATE_HOOK PREFIX(dfltcc_deflate) |
||||
|
||||
#define DEFLATE_NEED_CHECKSUM(strm) (!PREFIX(dfltcc_can_deflate)((strm))) |
||||
|
||||
#define DEFLATE_CAN_SET_REPRODUCIBLE PREFIX(dfltcc_can_set_reproducible) |
||||
|
||||
#define DEFLATE_ADJUST_WINDOW_SIZE(n) MAX(n, HB_SIZE) |
||||
|
||||
#endif |
@ -0,0 +1,275 @@ |
||||
#include "zbuild.h" |
||||
#include <stdio.h> |
||||
|
||||
#ifdef HAVE_SYS_SDT_H |
||||
#include <sys/sdt.h> |
||||
#endif |
||||
|
||||
/*
|
||||
Tuning parameters. |
||||
*/ |
||||
#ifndef DFLTCC_LEVEL_MASK |
||||
#define DFLTCC_LEVEL_MASK 0x2 |
||||
#endif |
||||
#ifndef DFLTCC_BLOCK_SIZE |
||||
#define DFLTCC_BLOCK_SIZE 1048576 |
||||
#endif |
||||
#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE |
||||
#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 |
||||
#endif |
||||
#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE |
||||
#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096 |
||||
#endif |
||||
#ifndef DFLTCC_RIBM |
||||
#define DFLTCC_RIBM 0 |
||||
#endif |
||||
|
||||
#define static_assert(c, msg) __attribute__((unused)) static char static_assert_failed_ ## msg[c ? 1 : -1] |
||||
|
||||
#define DFLTCC_SIZEOF_QAF 32 |
||||
static_assert(sizeof(struct dfltcc_qaf_param) == DFLTCC_SIZEOF_QAF, qaf); |
||||
|
||||
static inline int is_bit_set(const char *bits, int n) { |
||||
return bits[n / 8] & (1 << (7 - (n % 8))); |
||||
} |
||||
|
||||
static inline void clear_bit(char *bits, int n) { |
||||
bits[n / 8] &= ~(1 << (7 - (n % 8))); |
||||
} |
||||
|
||||
#define DFLTCC_FACILITY 151 |
||||
|
||||
static inline int is_dfltcc_enabled(void) { |
||||
uint64_t facilities[(DFLTCC_FACILITY / 64) + 1]; |
||||
Z_REGISTER uint8_t r0 __asm__("r0"); |
||||
|
||||
memset(facilities, 0, sizeof(facilities)); |
||||
r0 = sizeof(facilities) / sizeof(facilities[0]) - 1; |
||||
/* STFLE is supported since z9-109 and only in z/Architecture mode. When
|
||||
* compiling with -m31, gcc defaults to ESA mode, however, since the kernel |
||||
* is 64-bit, it's always z/Architecture mode at runtime. |
||||
*/ |
||||
__asm__ volatile( |
||||
#ifndef __clang__ |
||||
".machinemode push\n" |
||||
".machinemode zarch\n" |
||||
#endif |
||||
"stfle %[facilities]\n" |
||||
#ifndef __clang__ |
||||
".machinemode pop\n" |
||||
#endif |
||||
: [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc"); |
||||
return is_bit_set((const char *)facilities, DFLTCC_FACILITY); |
||||
} |
||||
|
||||
#define DFLTCC_FMT0 0 |
||||
|
||||
#define CVT_CRC32 0 |
||||
#define CVT_ADLER32 1 |
||||
#define HTT_FIXED 0 |
||||
#define HTT_DYNAMIC 1 |
||||
|
||||
#define DFLTCC_SIZEOF_GDHT_V0 384 |
||||
#define DFLTCC_SIZEOF_CMPR_XPND_V0 1536 |
||||
static_assert(offsetof(struct dfltcc_param_v0, csb) == DFLTCC_SIZEOF_GDHT_V0, gdht_v0); |
||||
static_assert(sizeof(struct dfltcc_param_v0) == DFLTCC_SIZEOF_CMPR_XPND_V0, cmpr_xpnd_v0); |
||||
|
||||
static inline z_const char *oesc_msg(char *buf, int oesc) { |
||||
if (oesc == 0x00) |
||||
return NULL; /* Successful completion */ |
||||
else { |
||||
sprintf(buf, "Operation-Ending-Supplemental Code is 0x%.2X", oesc); |
||||
return buf; |
||||
} |
||||
} |
||||
|
||||
/*
|
||||
C wrapper for the DEFLATE CONVERSION CALL instruction. |
||||
*/ |
||||
typedef enum { |
||||
DFLTCC_CC_OK = 0, |
||||
DFLTCC_CC_OP1_TOO_SHORT = 1, |
||||
DFLTCC_CC_OP2_TOO_SHORT = 2, |
||||
DFLTCC_CC_OP2_CORRUPT = 2, |
||||
DFLTCC_CC_AGAIN = 3, |
||||
} dfltcc_cc; |
||||
|
||||
#define DFLTCC_QAF 0 |
||||
#define DFLTCC_GDHT 1 |
||||
#define DFLTCC_CMPR 2 |
||||
#define DFLTCC_XPND 4 |
||||
#define HBT_CIRCULAR (1 << 7) |
||||
#define DFLTCC_FN_MASK ((1 << 7) - 1) |
||||
|
||||
/* Return lengths of high (starting at param->ho) and low (starting at 0) fragments of the circular history buffer. */ |
||||
static inline void get_history_lengths(struct dfltcc_param_v0 *param, size_t *hl_high, size_t *hl_low) { |
||||
*hl_high = MIN(param->hl, HB_SIZE - param->ho); |
||||
*hl_low = param->hl - *hl_high; |
||||
} |
||||
|
||||
/* Notify instrumentation about an upcoming read/write access to the circular history buffer. */ |
||||
static inline void instrument_read_write_hist(struct dfltcc_param_v0 *param, void *hist) { |
||||
size_t hl_high, hl_low; |
||||
|
||||
get_history_lengths(param, &hl_high, &hl_low); |
||||
instrument_read_write(hist + param->ho, hl_high); |
||||
instrument_read_write(hist, hl_low); |
||||
} |
||||
|
||||
/* Notify MSan about a completed write to the circular history buffer. */ |
||||
static inline void msan_unpoison_hist(struct dfltcc_param_v0 *param, void *hist) { |
||||
size_t hl_high, hl_low; |
||||
|
||||
get_history_lengths(param, &hl_high, &hl_low); |
||||
__msan_unpoison(hist + param->ho, hl_high); |
||||
__msan_unpoison(hist, hl_low); |
||||
} |
||||
|
||||
static inline dfltcc_cc dfltcc(int fn, void *param, |
||||
unsigned char **op1, size_t *len1, |
||||
z_const unsigned char **op2, size_t *len2, void *hist) { |
||||
unsigned char *t2 = op1 ? *op1 : NULL; |
||||
unsigned char *orig_t2 = t2; |
||||
size_t t3 = len1 ? *len1 : 0; |
||||
z_const unsigned char *t4 = op2 ? *op2 : NULL; |
||||
size_t t5 = len2 ? *len2 : 0; |
||||
Z_REGISTER int r0 __asm__("r0"); |
||||
Z_REGISTER void *r1 __asm__("r1"); |
||||
Z_REGISTER unsigned char *r2 __asm__("r2"); |
||||
Z_REGISTER size_t r3 __asm__("r3"); |
||||
Z_REGISTER z_const unsigned char *r4 __asm__("r4"); |
||||
Z_REGISTER size_t r5 __asm__("r5"); |
||||
int cc; |
||||
|
||||
/* Insert pre-instrumentation for DFLTCC. */ |
||||
switch (fn & DFLTCC_FN_MASK) { |
||||
case DFLTCC_QAF: |
||||
instrument_write(param, DFLTCC_SIZEOF_QAF); |
||||
break; |
||||
case DFLTCC_GDHT: |
||||
instrument_read_write(param, DFLTCC_SIZEOF_GDHT_V0); |
||||
instrument_read(t4, t5); |
||||
break; |
||||
case DFLTCC_CMPR: |
||||
case DFLTCC_XPND: |
||||
instrument_read_write(param, DFLTCC_SIZEOF_CMPR_XPND_V0); |
||||
instrument_read(t4, t5); |
||||
instrument_write(t2, t3); |
||||
instrument_read_write_hist(param, hist); |
||||
break; |
||||
} |
||||
|
||||
r0 = fn; r1 = param; r2 = t2; r3 = t3; r4 = t4; r5 = t5; |
||||
__asm__ volatile( |
||||
#ifdef HAVE_SYS_SDT_H |
||||
STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5)) |
||||
#endif |
||||
".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" |
||||
#ifdef HAVE_SYS_SDT_H |
||||
STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5)) |
||||
#endif |
||||
"ipm %[cc]\n" |
||||
: [r2] "+r" (r2) |
||||
, [r3] "+r" (r3) |
||||
, [r4] "+r" (r4) |
||||
, [r5] "+r" (r5) |
||||
, [cc] "=r" (cc) |
||||
: [r0] "r" (r0) |
||||
, [r1] "r" (r1) |
||||
, [hist] "r" (hist) |
||||
#ifdef HAVE_SYS_SDT_H |
||||
, STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) |
||||
#endif |
||||
: "cc", "memory"); |
||||
t2 = r2; t3 = r3; t4 = r4; t5 = r5; |
||||
|
||||
/* Insert post-instrumentation for DFLTCC. */ |
||||
switch (fn & DFLTCC_FN_MASK) { |
||||
case DFLTCC_QAF: |
||||
__msan_unpoison(param, DFLTCC_SIZEOF_QAF); |
||||
break; |
||||
case DFLTCC_GDHT: |
||||
__msan_unpoison(param, DFLTCC_SIZEOF_GDHT_V0); |
||||
break; |
||||
case DFLTCC_CMPR: |
||||
__msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); |
||||
__msan_unpoison(orig_t2, t2 - orig_t2 + (((struct dfltcc_param_v0 *)param)->sbb == 0 ? 0 : 1)); |
||||
msan_unpoison_hist(param, hist); |
||||
break; |
||||
case DFLTCC_XPND: |
||||
__msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); |
||||
__msan_unpoison(orig_t2, t2 - orig_t2); |
||||
msan_unpoison_hist(param, hist); |
||||
break; |
||||
} |
||||
|
||||
if (op1) |
||||
*op1 = t2; |
||||
if (len1) |
||||
*len1 = t3; |
||||
if (op2) |
||||
*op2 = t4; |
||||
if (len2) |
||||
*len2 = t5; |
||||
return (cc >> 28) & 3; |
||||
} |
||||
|
||||
#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) |
||||
|
||||
static inline void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) { |
||||
/* Initialize available functions */ |
||||
if (is_dfltcc_enabled()) { |
||||
dfltcc(DFLTCC_QAF, &dfltcc_state->param, NULL, NULL, NULL, NULL, NULL); |
||||
memmove(&dfltcc_state->af, &dfltcc_state->param, sizeof(dfltcc_state->af)); |
||||
} else |
||||
memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); |
||||
|
||||
/* Initialize parameter block */ |
||||
memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param)); |
||||
dfltcc_state->param.nt = 1; |
||||
dfltcc_state->param.ribm = DFLTCC_RIBM; |
||||
} |
||||
|
||||
static inline void dfltcc_copy_state(void *dst, const void *src, uInt size, uInt extension_size) { |
||||
memcpy(dst, src, ALIGN_UP(size, 8) + extension_size); |
||||
} |
||||
|
||||
static inline void append_history(struct dfltcc_param_v0 *param, unsigned char *history, |
||||
const unsigned char *buf, uInt count) { |
||||
size_t offset; |
||||
size_t n; |
||||
|
||||
/* Do not use more than 32K */ |
||||
if (count > HB_SIZE) { |
||||
buf += count - HB_SIZE; |
||||
count = HB_SIZE; |
||||
} |
||||
offset = (param->ho + param->hl) % HB_SIZE; |
||||
if (offset + count <= HB_SIZE) |
||||
/* Circular history buffer does not wrap - copy one chunk */ |
||||
memcpy(history + offset, buf, count); |
||||
else { |
||||
/* Circular history buffer wraps - copy two chunks */ |
||||
n = HB_SIZE - offset; |
||||
memcpy(history + offset, buf, n); |
||||
memcpy(history, buf + n, count - n); |
||||
} |
||||
n = param->hl + count; |
||||
if (n <= HB_SIZE) |
||||
/* All history fits into buffer - no need to discard anything */ |
||||
param->hl = n; |
||||
else { |
||||
/* History does not fit into buffer - discard extra bytes */ |
||||
param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; |
||||
param->hl = HB_SIZE; |
||||
} |
||||
} |
||||
|
||||
static inline void get_history(struct dfltcc_param_v0 *param, const unsigned char *history, |
||||
unsigned char *buf) { |
||||
size_t hl_high, hl_low; |
||||
|
||||
get_history_lengths(param, &hl_high, &hl_low); |
||||
memcpy(buf, history + param->ho, hl_high); |
||||
memcpy(buf + hl_high, history, hl_low); |
||||
} |
@ -0,0 +1,191 @@ |
||||
/* dfltcc_inflate.c - IBM Z DEFLATE CONVERSION CALL decompression support. */ |
||||
|
||||
/*
|
||||
Use the following commands to build zlib-ng with DFLTCC decompression support: |
||||
|
||||
$ ./configure --with-dfltcc-inflate |
||||
or |
||||
|
||||
$ cmake -DWITH_DFLTCC_INFLATE=1 . |
||||
|
||||
and then |
||||
|
||||
$ make |
||||
*/ |
||||
|
||||
#include "zbuild.h" |
||||
#include "zutil.h" |
||||
#include "inftrees.h" |
||||
#include "inflate.h" |
||||
#include "dfltcc_inflate.h" |
||||
#include "dfltcc_detail.h" |
||||
|
||||
void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm) { |
||||
struct inflate_state *state = (struct inflate_state *)strm->state; |
||||
|
||||
dfltcc_reset_state(&state->arch.common); |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm) { |
||||
struct inflate_state *state = (struct inflate_state *)strm->state; |
||||
struct dfltcc_state *dfltcc_state = &state->arch.common; |
||||
|
||||
/* Unsupported hardware */ |
||||
return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); |
||||
} |
||||
|
||||
static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) { |
||||
struct inflate_state *state = (struct inflate_state *)strm->state; |
||||
struct dfltcc_param_v0 *param = &state->arch.common.param; |
||||
size_t avail_in = strm->avail_in; |
||||
size_t avail_out = strm->avail_out; |
||||
dfltcc_cc cc; |
||||
|
||||
cc = dfltcc(DFLTCC_XPND | HBT_CIRCULAR, |
||||
param, &strm->next_out, &avail_out, |
||||
&strm->next_in, &avail_in, state->window); |
||||
strm->avail_in = avail_in; |
||||
strm->avail_out = avail_out; |
||||
return cc; |
||||
} |
||||
|
||||
dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret) { |
||||
struct inflate_state *state = (struct inflate_state *)strm->state; |
||||
struct dfltcc_state *dfltcc_state = &state->arch.common; |
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param; |
||||
dfltcc_cc cc; |
||||
|
||||
if (flush == Z_BLOCK || flush == Z_TREES) { |
||||
/* DFLTCC does not support stopping on block boundaries */ |
||||
if (PREFIX(dfltcc_inflate_disable)(strm)) { |
||||
*ret = Z_STREAM_ERROR; |
||||
return DFLTCC_INFLATE_BREAK; |
||||
} else |
||||
return DFLTCC_INFLATE_SOFTWARE; |
||||
} |
||||
|
||||
if (state->last) { |
||||
if (state->bits != 0) { |
||||
strm->next_in++; |
||||
strm->avail_in--; |
||||
state->bits = 0; |
||||
} |
||||
state->mode = CHECK; |
||||
return DFLTCC_INFLATE_CONTINUE; |
||||
} |
||||
|
||||
if (strm->avail_in == 0 && !param->cf) |
||||
return DFLTCC_INFLATE_BREAK; |
||||
|
||||
/* if window not in use yet, initialize */ |
||||
if (state->wsize == 0) |
||||
state->wsize = 1U << state->wbits; |
||||
|
||||
/* Translate stream to parameter block */ |
||||
param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32; |
||||
param->sbb = state->bits; |
||||
if (param->hl) |
||||
param->nt = 0; /* Honor history for the first block */ |
||||
if (state->wrap & 4) |
||||
param->cv = state->flags ? ZSWAP32(state->check) : state->check; |
||||
|
||||
/* Inflate */ |
||||
do { |
||||
cc = dfltcc_xpnd(strm); |
||||
} while (cc == DFLTCC_CC_AGAIN); |
||||
|
||||
/* Translate parameter block to stream */ |
||||
strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); |
||||
state->last = cc == DFLTCC_CC_OK; |
||||
state->bits = param->sbb; |
||||
if (state->wrap & 4) |
||||
strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv; |
||||
if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { |
||||
/* Report an error if stream is corrupted */ |
||||
state->mode = BAD; |
||||
return DFLTCC_INFLATE_CONTINUE; |
||||
} |
||||
state->mode = TYPEDO; |
||||
/* Break if operands are exhausted, otherwise continue looping */ |
||||
return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ? |
||||
DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE; |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm) { |
||||
struct inflate_state *state = (struct inflate_state *)strm->state; |
||||
|
||||
return !state->arch.common.param.nt; |
||||
} |
||||
|
||||
/*
|
||||
Rotates a circular buffer. |
||||
The implementation is based on https://cplusplus.com/reference/algorithm/rotate/
|
||||
*/ |
||||
static void rotate(unsigned char *start, unsigned char *pivot, unsigned char *end) { |
||||
unsigned char *p = pivot; |
||||
unsigned char tmp; |
||||
|
||||
while (p != start) { |
||||
tmp = *start; |
||||
*start = *p; |
||||
*p = tmp; |
||||
|
||||
start++; |
||||
p++; |
||||
|
||||
if (p == end) |
||||
p = pivot; |
||||
else if (start == pivot) |
||||
pivot = p; |
||||
} |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm) { |
||||
struct inflate_state *state = (struct inflate_state *)strm->state; |
||||
struct dfltcc_state *dfltcc_state = &state->arch.common; |
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param; |
||||
|
||||
if (!PREFIX(dfltcc_can_inflate)(strm)) |
||||
return 0; |
||||
if (PREFIX(dfltcc_was_inflate_used)(strm)) |
||||
/* DFLTCC has already decompressed some data. Since there is not
|
||||
* enough information to resume decompression in software, the call |
||||
* must fail. |
||||
*/ |
||||
return 1; |
||||
/* DFLTCC was not used yet - decompress in software */ |
||||
memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); |
||||
/* Convert the window from the hardware to the software format */ |
||||
rotate(state->window, state->window + param->ho, state->window + HB_SIZE); |
||||
state->whave = state->wnext = MIN(param->hl, state->wsize); |
||||
return 0; |
||||
} |
||||
|
||||
/*
|
||||
Preloading history. |
||||
*/ |
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, |
||||
const unsigned char *dictionary, uInt dict_length) { |
||||
struct inflate_state *state = (struct inflate_state *)strm->state; |
||||
struct dfltcc_param_v0 *param = &state->arch.common.param; |
||||
|
||||
/* if window not in use yet, initialize */ |
||||
if (state->wsize == 0) |
||||
state->wsize = 1U << state->wbits; |
||||
|
||||
append_history(param, state->window, dictionary, dict_length); |
||||
state->havedict = 1; |
||||
return Z_OK; |
||||
} |
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, |
||||
unsigned char *dictionary, uInt *dict_length) { |
||||
struct inflate_state *state = (struct inflate_state *)strm->state; |
||||
struct dfltcc_param_v0 *param = &state->arch.common.param; |
||||
|
||||
if (dictionary && state->window) |
||||
get_history(param, state->window, dictionary); |
||||
if (dict_length) |
||||
*dict_length = param->hl; |
||||
return Z_OK; |
||||
} |
@ -0,0 +1,67 @@ |
||||
#ifndef DFLTCC_INFLATE_H |
||||
#define DFLTCC_INFLATE_H |
||||
|
||||
#include "dfltcc_common.h" |
||||
|
||||
void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm); |
||||
int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm); |
||||
typedef enum { |
||||
DFLTCC_INFLATE_CONTINUE, |
||||
DFLTCC_INFLATE_BREAK, |
||||
DFLTCC_INFLATE_SOFTWARE, |
||||
} dfltcc_inflate_action; |
||||
dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret); |
||||
int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm); |
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm); |
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, |
||||
const unsigned char *dictionary, uInt dict_length); |
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, |
||||
unsigned char *dictionary, uInt* dict_length); |
||||
|
||||
#define INFLATE_RESET_KEEP_HOOK PREFIX(dfltcc_reset_inflate_state) |
||||
|
||||
#define INFLATE_PRIME_HOOK(strm, bits, value) \ |
||||
do { if (PREFIX(dfltcc_inflate_disable)((strm))) return Z_STREAM_ERROR; } while (0) |
||||
|
||||
#define INFLATE_TYPEDO_HOOK(strm, flush) \ |
||||
if (PREFIX(dfltcc_can_inflate)((strm))) { \
|
||||
dfltcc_inflate_action action; \
|
||||
\
|
||||
RESTORE(); \
|
||||
action = PREFIX(dfltcc_inflate)((strm), (flush), &ret); \
|
||||
LOAD(); \
|
||||
if (action == DFLTCC_INFLATE_CONTINUE) \
|
||||
break; \
|
||||
else if (action == DFLTCC_INFLATE_BREAK) \
|
||||
goto inf_leave; \
|
||||
} |
||||
|
||||
#define INFLATE_NEED_CHECKSUM(strm) (!PREFIX(dfltcc_can_inflate)((strm))) |
||||
|
||||
#define INFLATE_NEED_UPDATEWINDOW(strm) (!PREFIX(dfltcc_can_inflate)((strm))) |
||||
|
||||
#define INFLATE_MARK_HOOK(strm) \ |
||||
do { \
|
||||
if (PREFIX(dfltcc_was_inflate_used)((strm))) return -(1L << 16); \
|
||||
} while (0) |
||||
|
||||
#define INFLATE_SYNC_POINT_HOOK(strm) \ |
||||
do { \
|
||||
if (PREFIX(dfltcc_was_inflate_used)((strm))) return Z_STREAM_ERROR; \
|
||||
} while (0) |
||||
|
||||
#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ |
||||
do { \
|
||||
if (PREFIX(dfltcc_can_inflate)((strm))) \
|
||||
return PREFIX(dfltcc_inflate_set_dictionary)((strm), (dict), (dict_len)); \
|
||||
} while (0) |
||||
|
||||
#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ |
||||
do { \
|
||||
if (PREFIX(dfltcc_can_inflate)((strm))) \
|
||||
return PREFIX(dfltcc_inflate_get_dictionary)((strm), (dict), (dict_len)); \
|
||||
} while (0) |
||||
|
||||
#define INFLATE_ADJUST_WINDOW_SIZE(n) MAX(n, HB_SIZE) |
||||
|
||||
#endif |
@ -0,0 +1,14 @@ |
||||
#include "zbuild.h" |
||||
#include "s390_features.h" |
||||
|
||||
#ifdef HAVE_SYS_AUXV_H |
||||
# include <sys/auxv.h> |
||||
#endif |
||||
|
||||
#ifndef HWCAP_S390_VXRS |
||||
#define HWCAP_S390_VXRS HWCAP_S390_VX |
||||
#endif |
||||
|
||||
void Z_INTERNAL s390_check_features(struct s390_cpu_features *features) { |
||||
features->has_vx = getauxval(AT_HWCAP) & HWCAP_S390_VXRS; |
||||
} |
@ -0,0 +1,14 @@ |
||||
/* s390_features.h -- check for s390 features.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#ifndef S390_FEATURES_H_ |
||||
#define S390_FEATURES_H_ |
||||
|
||||
struct s390_cpu_features { |
||||
int has_vx; |
||||
}; |
||||
|
||||
void Z_INTERNAL s390_check_features(struct s390_cpu_features *features); |
||||
|
||||
#endif |
@ -0,0 +1,20 @@ |
||||
/* s390_functions.h -- s390 implementations for arch-specific functions.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#ifndef S390_FUNCTIONS_H_ |
||||
#define S390_FUNCTIONS_H_ |
||||
|
||||
#ifdef S390_CRC32_VX |
||||
uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len); |
||||
#endif |
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION |
||||
# if defined(S390_CRC32_VX) && defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__) |
||||
# undef native_crc32 |
||||
# define native_crc32 = crc32_s390_vx |
||||
# endif |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,47 @@ |
||||
# Self-Hosted IBM Z Github Actions Runner. |
||||
|
||||
FROM almalinux:9 |
||||
|
||||
RUN dnf update -y -q && \ |
||||
dnf install -y -q --enablerepo=crb wget git which sudo jq \ |
||||
cmake make automake autoconf m4 libtool ninja-build python3-pip \ |
||||
gcc gcc-c++ clang llvm-toolset glibc-all-langpacks langpacks-en \ |
||||
glibc-static libstdc++-static libstdc++-devel libxslt-devel libxml2-devel |
||||
|
||||
RUN dnf install -y -q dotnet-sdk-6.0 && \ |
||||
echo "Using SDK - `dotnet --version`" |
||||
|
||||
COPY runner-s390x.patch /tmp/runner.patch |
||||
COPY runner-global.json /tmp/global.json |
||||
|
||||
RUN cd /tmp && \ |
||||
git clone -q https://github.com/actions/runner && \ |
||||
cd runner && \ |
||||
git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) -b build && \ |
||||
git apply /tmp/runner.patch && \ |
||||
cp -f /tmp/global.json src/global.json |
||||
|
||||
|
||||
RUN cd /tmp/runner/src && \ |
||||
./dev.sh layout && \ |
||||
./dev.sh package && \ |
||||
rm -rf /root/.dotnet /root/.nuget |
||||
|
||||
RUN useradd -c "Action Runner" -m actions-runner && \ |
||||
usermod -L actions-runner |
||||
|
||||
RUN tar -xf /tmp/runner/_package/*.tar.gz -C /home/actions-runner && \ |
||||
chown -R actions-runner:actions-runner /home/actions-runner |
||||
|
||||
#VOLUME /home/actions-runner |
||||
|
||||
RUN rm -rf /tmp/runner /var/cache/dnf/* /tmp/runner.patch /tmp/global.json && \ |
||||
dnf clean all |
||||
|
||||
USER actions-runner |
||||
|
||||
# Scripts. |
||||
COPY fs/ / |
||||
WORKDIR /home/actions-runner |
||||
ENTRYPOINT ["/usr/bin/entrypoint"] |
||||
CMD ["/usr/bin/actions-runner"] |
@ -0,0 +1,18 @@ |
||||
[Unit] |
||||
Description=Podman container: Gaplib Github Actions Runner |
||||
Wants=network-online.target |
||||
After=network-online.target |
||||
StartLimitIntervalSec=1 |
||||
RequiresMountsFor=/run/user/1001/containers |
||||
|
||||
[Service] |
||||
Environment=PODMAN_SYSTEMD_UNIT=%n |
||||
Restart=always |
||||
TimeoutStopSec=61 |
||||
ExecStart=/usr/bin/podman start gaplib-actions-runner |
||||
ExecStop=/usr/bin/podman stop -t 1 gaplib-actions-runner |
||||
ExecStopPost=/usr/bin/podman stop -t 1 gaplib-actions-runner |
||||
Type=forking |
||||
|
||||
[Install] |
||||
WantedBy=default.target |
@ -0,0 +1,5 @@ |
||||
{ |
||||
"sdk": { |
||||
"version": "6.0.421" |
||||
} |
||||
} |
@ -1,24 +0,0 @@ |
||||
/* insert_string_sse42.c -- insert_string integer hash variant using SSE4.2's CRC instructions
|
||||
* |
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
* |
||||
*/ |
||||
|
||||
#ifdef X86_SSE42 |
||||
#include "../../zbuild.h" |
||||
#include <nmmintrin.h> |
||||
#include "../../deflate.h" |
||||
|
||||
#define HASH_CALC(s, h, val)\ |
||||
h = _mm_crc32_u32(h, val) |
||||
|
||||
#define HASH_CALC_VAR h |
||||
#define HASH_CALC_VAR_INIT uint32_t h = 0 |
||||
|
||||
#define UPDATE_HASH update_hash_sse42 |
||||
#define INSERT_STRING insert_string_sse42 |
||||
#define QUICK_INSERT_STRING quick_insert_string_sse42 |
||||
|
||||
#include "../../insert_string_tpl.h" |
||||
#endif |
@ -0,0 +1,172 @@ |
||||
/* x86_functions.h -- x86 implementations for arch-specific functions.
|
||||
* Copyright (C) 2013 Intel Corporation Jim Kukunas |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#ifndef X86_FUNCTIONS_H_ |
||||
#define X86_FUNCTIONS_H_ |
||||
|
||||
#ifdef X86_SSE2 |
||||
uint32_t chunksize_sse2(void); |
||||
uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); |
||||
|
||||
# ifdef HAVE_BUILTIN_CTZ |
||||
uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); |
||||
uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match); |
||||
uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match); |
||||
void slide_hash_sse2(deflate_state *s); |
||||
# endif |
||||
void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start); |
||||
#endif |
||||
|
||||
#ifdef X86_SSSE3 |
||||
uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); |
||||
uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left); |
||||
void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); |
||||
#endif |
||||
|
||||
#ifdef X86_SSE42 |
||||
uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
||||
#endif |
||||
|
||||
#ifdef X86_AVX2 |
||||
uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); |
||||
uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
||||
uint32_t chunksize_avx2(void); |
||||
uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left); |
||||
|
||||
# ifdef HAVE_BUILTIN_CTZ |
||||
uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); |
||||
uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match); |
||||
uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match); |
||||
void slide_hash_avx2(deflate_state *s); |
||||
# endif |
||||
void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start); |
||||
#endif |
||||
#ifdef X86_AVX512 |
||||
uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); |
||||
uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
||||
#endif |
||||
#ifdef X86_AVX512VNNI |
||||
uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len); |
||||
uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
||||
#endif |
||||
|
||||
#ifdef X86_PCLMULQDQ_CRC |
||||
uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc); |
||||
void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); |
||||
void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); |
||||
uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc); |
||||
uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); |
||||
#endif |
||||
#ifdef X86_VPCLMULQDQ_CRC |
||||
uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc); |
||||
void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); |
||||
void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); |
||||
uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc); |
||||
uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); |
||||
#endif |
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION |
||||
// X86 - SSE2
|
||||
# if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2) |
||||
# undef native_chunkmemset_safe |
||||
# define native_chunkmemset_safe chunkmemset_safe_sse2 |
||||
# undef native_chunksize |
||||
# define native_chunksize chunksize_sse2 |
||||
# undef native_inflate_fast |
||||
# define native_inflate_fast inflate_fast_sse2 |
||||
# undef native_slide_hash |
||||
# define native_slide_hash slide_hash_sse2 |
||||
# ifdef HAVE_BUILTIN_CTZ |
||||
# undef native_compare256 |
||||
# define native_compare256 compare256_sse2 |
||||
# undef native_longest_match |
||||
# define native_longest_match longest_match_sse2 |
||||
# undef native_longest_match_slow |
||||
# define native_longest_match_slow longest_match_slow_sse2 |
||||
# endif |
||||
#endif |
||||
// X86 - SSSE3
|
||||
# if defined(X86_SSSE3) && defined(__SSSE3__) |
||||
# undef native_adler32 |
||||
# define native_adler32 adler32_ssse3 |
||||
# undef native_chunkmemset_safe |
||||
# define native_chunkmemset_safe chunkmemset_safe_ssse3 |
||||
# undef native_inflate_fast |
||||
# define native_inflate_fast inflate_fast_ssse3 |
||||
# endif |
||||
// X86 - SSE4.2
|
||||
# if defined(X86_SSE42) && defined(__SSE4_2__) |
||||
# undef native_adler32_fold_copy |
||||
# define native_adler32_fold_copy adler32_fold_copy_sse42 |
||||
# endif |
||||
|
||||
// X86 - PCLMUL
|
||||
#if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__) |
||||
# undef native_crc32 |
||||
# define native_crc32 crc32_pclmulqdq |
||||
# undef native_crc32_fold |
||||
# define native_crc32_fold crc32_fold_pclmulqdq |
||||
# undef native_crc32_fold_copy |
||||
# define native_crc32_fold_copy crc32_fold_pclmulqdq_copy |
||||
# undef native_crc32_fold_final |
||||
# define native_crc32_fold_final crc32_fold_pclmulqdq_final |
||||
# undef native_crc32_fold_reset |
||||
# define native_crc32_fold_reset crc32_fold_pclmulqdq_reset |
||||
#endif |
||||
// X86 - AVX
|
||||
# if defined(X86_AVX2) && defined(__AVX2__) |
||||
# undef native_adler32 |
||||
# define native_adler32 adler32_avx2 |
||||
# undef native_adler32_fold_copy |
||||
# define native_adler32_fold_copy adler32_fold_copy_avx2 |
||||
# undef native_chunkmemset_safe |
||||
# define native_chunkmemset_safe chunkmemset_safe_avx2 |
||||
# undef native_chunksize |
||||
# define native_chunksize chunksize_avx2 |
||||
# undef native_inflate_fast |
||||
# define native_inflate_fast inflate_fast_avx2 |
||||
# undef native_slide_hash |
||||
# define native_slide_hash slide_hash_avx2 |
||||
# ifdef HAVE_BUILTIN_CTZ |
||||
# undef native_compare256 |
||||
# define native_compare256 compare256_avx2 |
||||
# undef native_longest_match |
||||
# define native_longest_match longest_match_avx2 |
||||
# undef native_longest_match_slow |
||||
# define native_longest_match_slow longest_match_slow_avx2 |
||||
# endif |
||||
# endif |
||||
|
||||
// X86 - AVX512 (F,DQ,BW,Vl)
|
||||
# if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__) |
||||
# undef native_adler32 |
||||
# define native_adler32 adler32_avx512 |
||||
# undef native_adler32_fold_copy |
||||
# define native_adler32_fold_copy adler32_fold_copy_avx512 |
||||
// X86 - AVX512 (VNNI)
|
||||
# if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__) |
||||
# undef native_adler32 |
||||
# define native_adler32 adler32_avx512_vnni |
||||
# undef native_adler32_fold_copy |
||||
# define native_adler32_fold_copy adler32_fold_copy_avx512_vnni |
||||
# endif |
||||
// X86 - VPCLMULQDQ
|
||||
# if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__) |
||||
# undef native_crc32 |
||||
# define native_crc32 crc32_vpclmulqdq |
||||
# undef native_crc32_fold |
||||
# define native_crc32_fold crc32_fold_vpclmulqdq |
||||
# undef native_crc32_fold_copy |
||||
# define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy |
||||
# undef native_crc32_fold_final |
||||
# define native_crc32_fold_final crc32_fold_vpclmulqdq_final |
||||
# undef native_crc32_fold_reset |
||||
# define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset |
||||
# endif |
||||
# endif |
||||
#endif |
||||
|
||||
#endif /* X86_FUNCTIONS_H_ */ |
@ -0,0 +1,29 @@ |
||||
/* arch_functions.h -- Arch-specific function prototypes.
|
||||
* Copyright (C) 2017 Hans Kristian Rosbach |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
#ifndef CPU_FUNCTIONS_H_ |
||||
#define CPU_FUNCTIONS_H_ |
||||
|
||||
#include "zbuild.h" |
||||
#include "zutil.h" |
||||
#include "crc32.h" |
||||
#include "deflate.h" |
||||
#include "fallback_builtins.h" |
||||
|
||||
#include "arch/generic/generic_functions.h" |
||||
|
||||
#if defined(X86_FEATURES) |
||||
# include "arch/x86/x86_functions.h" |
||||
#elif defined(ARM_FEATURES) |
||||
# include "arch/arm/arm_functions.h" |
||||
#elif defined(PPC_FEATURES) || defined(POWER_FEATURES) |
||||
# include "arch/power/power_functions.h" |
||||
#elif defined(S390_FEATURES) |
||||
# include "arch/s390/s390_functions.h" |
||||
#elif defined(RISCV_FEATURES) |
||||
# include "arch/riscv/riscv_functions.h" |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,115 @@ |
||||
// archdetect.c -- Detect compiler architecture and raise preprocessor error
|
||||
// containing a simple arch identifier.
|
||||
// Copyright (C) 2019 Hans Kristian Rosbach
|
||||
// Licensed under the Zlib license, see LICENSE.md for details
|
||||
|
||||
// x86_64
|
||||
#if defined(__x86_64__) || defined(_M_X64) |
||||
#error archfound x86_64 |
||||
|
||||
// x86
|
||||
#elif defined(__i386) || defined(_M_IX86) |
||||
#error archfound i686 |
||||
|
||||
// ARM
|
||||
#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC) |
||||
#error archfound aarch64 |
||||
#elif defined(__arm__) || defined(__arm) || defined(_M_ARM) || defined(__TARGET_ARCH_ARM) |
||||
#if defined(__ARM64_ARCH_8__) || defined(__ARMv8__) || defined(__ARMv8_A__) |
||||
#error archfound armv8 |
||||
#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) |
||||
#error archfound armv7 |
||||
#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6M__) |
||||
#error archfound armv6 |
||||
#elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) |
||||
#error archfound armv5 |
||||
#elif defined(__ARM_ARCH_4T__) || defined(__TARGET_ARCH_5E__) |
||||
#error archfound armv4 |
||||
#elif defined(__ARM_ARCH_3__) || defined(__TARGET_ARCH_3M__) |
||||
#error archfound armv3 |
||||
#elif defined(__ARM_ARCH_2__) |
||||
#error archfound armv2 |
||||
#endif |
||||
|
||||
// PowerPC
|
||||
#elif defined(__powerpc__) || defined(_ppc__) || defined(__PPC__) |
||||
#if defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__) |
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
||||
#error archfound powerpc64le |
||||
#else |
||||
#error archfound powerpc64 |
||||
#endif |
||||
#else |
||||
#error archfound powerpc |
||||
#endif |
||||
|
||||
// --------------- Less common architectures alphabetically below ---------------
|
||||
|
||||
// ALPHA
|
||||
#elif defined(__alpha__) || defined(__alpha) |
||||
#error archfound alpha |
||||
|
||||
// Blackfin
|
||||
#elif defined(__BFIN__) |
||||
#error archfound blackfin |
||||
|
||||
// Itanium
|
||||
#elif defined(__ia64) || defined(_M_IA64) |
||||
#error archfound ia64 |
||||
|
||||
// MIPS
|
||||
#elif defined(__mips__) || defined(__mips) |
||||
#error archfound mips |
||||
|
||||
// Motorola 68000-series
|
||||
#elif defined(__m68k__) |
||||
#error archfound m68k |
||||
|
||||
// SuperH
|
||||
#elif defined(__sh__) |
||||
#error archfound sh |
||||
|
||||
// SPARC
|
||||
#elif defined(__sparc__) || defined(__sparc) |
||||
#if defined(__sparcv9) || defined(__sparc_v9__) |
||||
#error archfound sparc9 |
||||
#elif defined(__sparcv8) || defined(__sparc_v8__) |
||||
#error archfound sparc8 |
||||
#endif |
||||
|
||||
// SystemZ
|
||||
#elif defined(__370__) |
||||
#error archfound s370 |
||||
#elif defined(__s390__) |
||||
#error archfound s390 |
||||
#elif defined(__s390x) || defined(__zarch__) |
||||
#error archfound s390x |
||||
|
||||
// PARISC
|
||||
#elif defined(__hppa__) |
||||
#error archfound parisc |
||||
|
||||
// RS-6000
|
||||
#elif defined(__THW_RS6000) |
||||
#error archfound rs6000 |
||||
|
||||
// RISC-V
|
||||
#elif defined(__riscv) |
||||
#if __riscv_xlen == 64 |
||||
#error archfound riscv64 |
||||
#elif __riscv_xlen == 32 |
||||
#error archfound riscv32 |
||||
#endif |
||||
|
||||
// LOONGARCH
|
||||
#elif defined(__loongarch_lp64) |
||||
#error archfound loongarch64 |
||||
|
||||
// Emscripten (WebAssembly)
|
||||
#elif defined(__EMSCRIPTEN__) |
||||
#error archfound wasm32 |
||||
|
||||
// return 'unrecognized' if we do not know what architecture this is
|
||||
#else |
||||
#error archfound unrecognized |
||||
#endif |
@ -0,0 +1,104 @@ |
||||
# detect-arch.cmake -- Detect compiler architecture and set ARCH and BASEARCH |
||||
# Copyright (C) 2019 Hans Kristian Rosbach |
||||
# Licensed under the Zlib license, see LICENSE.md for details |
||||
set(ARCHDETECT_FOUND TRUE) |
||||
|
||||
if(CMAKE_OSX_ARCHITECTURES) |
||||
# If multiple architectures are requested (universal build), pick only the first |
||||
list(GET CMAKE_OSX_ARCHITECTURES 0 ARCH) |
||||
elseif(MSVC) |
||||
if("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "X86") |
||||
set(ARCH "i686") |
||||
elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "x64") |
||||
set(ARCH "x86_64") |
||||
elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7") |
||||
set(ARCH "arm") |
||||
elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64EC") |
||||
set(ARCH "aarch64") |
||||
endif() |
||||
elseif(EMSCRIPTEN) |
||||
set(ARCH "wasm32") |
||||
elseif(CMAKE_CROSSCOMPILING) |
||||
set(ARCH ${CMAKE_C_COMPILER_TARGET}) |
||||
else() |
||||
# Let preprocessor parse archdetect.c and raise an error containing the arch identifier |
||||
enable_language(C) |
||||
try_run( |
||||
run_result_unused |
||||
compile_result_unused |
||||
${CMAKE_CURRENT_BINARY_DIR} |
||||
${CMAKE_CURRENT_LIST_DIR}/detect-arch.c |
||||
COMPILE_OUTPUT_VARIABLE RAWOUTPUT |
||||
CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} |
||||
) |
||||
|
||||
# Find basearch tag, and extract the arch word into BASEARCH variable |
||||
string(REGEX REPLACE ".*archfound ([a-zA-Z0-9_]+).*" "\\1" ARCH "${RAWOUTPUT}") |
||||
if(NOT ARCH) |
||||
set(ARCH unknown) |
||||
endif() |
||||
endif() |
||||
|
||||
# Make sure we have ARCH set |
||||
if(NOT ARCH OR ARCH STREQUAL "unknown") |
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) |
||||
message(STATUS "Arch not recognized, falling back to cmake arch: '${ARCH}'") |
||||
else() |
||||
message(STATUS "Arch detected: '${ARCH}'") |
||||
endif() |
||||
|
||||
# Base arch detection |
||||
if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)") |
||||
set(BASEARCH "x86") |
||||
set(BASEARCH_X86_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64|cortex)") |
||||
set(BASEARCH "arm") |
||||
set(BASEARCH_ARM_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?") |
||||
set(BASEARCH "ppc") |
||||
set(BASEARCH_PPC_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "alpha") |
||||
set(BASEARCH "alpha") |
||||
set(BASEARCH_ALPHA_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "blackfin") |
||||
set(BASEARCH "blackfin") |
||||
set(BASEARCH_BLACKFIN_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "ia64") |
||||
set(BASEARCH "ia64") |
||||
set(BASEARCH_IA64_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "mips") |
||||
set(BASEARCH "mips") |
||||
set(BASEARCH_MIPS_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "m68k") |
||||
set(BASEARCH "m68k") |
||||
set(BASEARCH_M68K_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "sh") |
||||
set(BASEARCH "sh") |
||||
set(BASEARCH_SH_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "sparc[89]?") |
||||
set(BASEARCH "sparc") |
||||
set(BASEARCH_SPARC_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "s3[679]0x?") |
||||
set(BASEARCH "s360") |
||||
set(BASEARCH_S360_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "parisc") |
||||
set(BASEARCH "parisc") |
||||
set(BASEARCH_PARISC_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "rs6000") |
||||
set(BASEARCH "rs6000") |
||||
set(BASEARCH_RS6000_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "riscv(32|64)") |
||||
set(BASEARCH "riscv") |
||||
set(BASEARCH_RISCV_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "loongarch64") |
||||
set(BASEARCH "loongarch") |
||||
set(BASEARCH_LOONGARCH_FOUND TRUE) |
||||
elseif("${ARCH}" MATCHES "wasm32") |
||||
set(BASEARCH "wasm32") |
||||
set(BASEARCH_WASM32_FOUND TRUE) |
||||
else() |
||||
set(BASEARCH "x86") |
||||
set(BASEARCH_X86_FOUND TRUE) |
||||
message(STATUS "Basearch '${ARCH}' not recognized, defaulting to 'x86'.") |
||||
endif() |
||||
message(STATUS "Basearch of '${ARCH}' has been detected as: '${BASEARCH}'") |
@ -0,0 +1,46 @@ |
||||
# detect-coverage.cmake -- Detect supported compiler coverage flags |
||||
# Licensed under the Zlib license, see LICENSE.md for details |
||||
|
||||
macro(add_code_coverage) |
||||
# Check for -coverage flag support for Clang/GCC |
||||
if(CMAKE_VERSION VERSION_LESS 3.14) |
||||
set(CMAKE_REQUIRED_LIBRARIES -lgcov) |
||||
else() |
||||
set(CMAKE_REQUIRED_LINK_OPTIONS -coverage) |
||||
endif() |
||||
check_c_compiler_flag(-coverage HAVE_COVERAGE) |
||||
set(CMAKE_REQUIRED_LIBRARIES) |
||||
set(CMAKE_REQUIRED_LINK_OPTIONS) |
||||
|
||||
if(HAVE_COVERAGE) |
||||
add_compile_options(-coverage) |
||||
add_link_options(-coverage) |
||||
message(STATUS "Code coverage enabled using: -coverage") |
||||
else() |
||||
# Some versions of GCC don't support -coverage shorthand |
||||
if(CMAKE_VERSION VERSION_LESS 3.14) |
||||
set(CMAKE_REQUIRED_LIBRARIES -lgcov) |
||||
else() |
||||
set(CMAKE_REQUIRED_LINK_OPTIONS -lgcov -fprofile-arcs) |
||||
endif() |
||||
check_c_compiler_flag("-ftest-coverage -fprofile-arcs -fprofile-values" HAVE_TEST_COVERAGE) |
||||
set(CMAKE_REQUIRED_LIBRARIES) |
||||
set(CMAKE_REQUIRED_LINK_OPTIONS) |
||||
|
||||
if(HAVE_TEST_COVERAGE) |
||||
add_compile_options(-ftest-coverage -fprofile-arcs -fprofile-values) |
||||
add_link_options(-lgcov -fprofile-arcs) |
||||
message(STATUS "Code coverage enabled using: -ftest-coverage") |
||||
else() |
||||
message(WARNING "Compiler does not support code coverage") |
||||
set(WITH_CODE_COVERAGE OFF) |
||||
endif() |
||||
endif() |
||||
|
||||
# Set optimization level to zero for code coverage builds |
||||
if (WITH_CODE_COVERAGE) |
||||
# Use CMake compiler flag variables due to add_compile_options failure on Windows GCC |
||||
set(CMAKE_C_FLAGS "-O0 ${CMAKE_C_FLAGS}") |
||||
set(CMAKE_CXX_FLAGS "-O0 ${CMAKE_CXX_FLAGS}") |
||||
endif() |
||||
endmacro() |
@ -0,0 +1,43 @@ |
||||
# detect-install-dirs.cmake -- Detect install directory parameters |
||||
# Copyright (C) 2021 Hans Kristian Rosbach |
||||
# Licensed under the Zlib license, see LICENSE.md for details |
||||
|
||||
# Determine installation directory for executables |
||||
if (DEFINED BIN_INSTALL_DIR) |
||||
set(BIN_INSTALL_DIR "${BIN_INSTALL_DIR}" CACHE PATH "Installation directory for executables (Deprecated)" FORCE) |
||||
set(CMAKE_INSTALL_BINDIR "${BIN_INSTALL_DIR}") |
||||
elseif (DEFINED INSTALL_BIN_DIR) |
||||
set(CMAKE_INSTALL_BINDIR "${INSTALL_BIN_DIR}") |
||||
endif() |
||||
|
||||
# Determine installation directory for libraries |
||||
if (DEFINED LIB_INSTALL_DIR) |
||||
set(LIB_INSTALL_DIR "${LIB_INSTALL_DIR}" CACHE PATH "Installation directory for libraries (Deprecated)" FORCE) |
||||
set(CMAKE_INSTALL_LIBDIR "${LIB_INSTALL_DIR}") |
||||
elseif (DEFINED INSTALL_LIB_DIR) |
||||
set(CMAKE_INSTALL_LIBDIR "${INSTALL_LIB_DIR}") |
||||
endif() |
||||
|
||||
# Determine installation directory for include files |
||||
if (DEFINED INC_INSTALL_DIR) |
||||
set(INC_INSTALL_DIR "${INC_INSTALL_DIR}" CACHE PATH "Installation directory for headers (Deprecated)" FORCE) |
||||
set(CMAKE_INSTALL_INCLUDEDIR "${INC_INSTALL_DIR}") |
||||
elseif (DEFINED INSTALL_INC_DIR) |
||||
set(CMAKE_INSTALL_INCLUDEDIR "${INSTALL_INC_DIR}") |
||||
endif() |
||||
|
||||
# Define GNU standard installation directories |
||||
include(GNUInstallDirs) |
||||
|
||||
# Determine installation directory for pkgconfig files |
||||
if (DEFINED PKGCONFIG_INSTALL_DIR) |
||||
set(PKGCONFIG_INSTALL_DIR "${PKGCONFIG_INSTALL_DIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) |
||||
elseif (DEFINED INSTALL_PKGCONFIG_DIR) |
||||
set(PKGCONFIG_INSTALL_DIR "${INSTALL_PKGCONFIG_DIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) |
||||
elseif (DEFINED CMAKE_INSTALL_PKGCONFIGDIR) |
||||
set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_PKGCONFIGDIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) |
||||
elseif (DEFINED CMAKE_INSTALL_FULL_PKGCONFIGDIR) |
||||
set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_FULL_PKGCONFIGDIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) |
||||
else() |
||||
set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig" CACHE PATH "Installation directory for pkgconfig (.pc) files") |
||||
endif() |
@ -0,0 +1,166 @@ |
||||
# detect-sanitizer.cmake -- Detect supported compiler sanitizer flags |
||||
# Licensed under the Zlib license, see LICENSE.md for details |
||||
|
||||
macro(add_common_sanitizer_flags) |
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") |
||||
add_compile_options(-g3) |
||||
endif() |
||||
check_c_compiler_flag(-fno-omit-frame-pointer HAVE_NO_OMIT_FRAME_POINTER) |
||||
if(HAVE_NO_OMIT_FRAME_POINTER) |
||||
add_compile_options(-fno-omit-frame-pointer) |
||||
add_link_options(-fno-omit-frame-pointer) |
||||
endif() |
||||
check_c_compiler_flag(-fno-optimize-sibling-calls HAVE_NO_OPTIMIZE_SIBLING_CALLS) |
||||
if(HAVE_NO_OPTIMIZE_SIBLING_CALLS) |
||||
add_compile_options(-fno-optimize-sibling-calls) |
||||
add_link_options(-fno-optimize-sibling-calls) |
||||
endif() |
||||
endmacro() |
||||
|
||||
macro(check_sanitizer_support known_checks supported_checks) |
||||
set(available_checks "") |
||||
|
||||
# Build list of supported sanitizer flags by incrementally trying compilation with |
||||
# known sanitizer checks |
||||
|
||||
foreach(check ${known_checks}) |
||||
if(available_checks STREQUAL "") |
||||
set(compile_checks "${check}") |
||||
else() |
||||
set(compile_checks "${available_checks},${check}") |
||||
endif() |
||||
|
||||
set(CMAKE_REQUIRED_FLAGS -fsanitize=${compile_checks}) |
||||
|
||||
check_c_source_compiles("int main() { return 0; }" HAVE_SANITIZER_${check} |
||||
FAIL_REGEX "not supported|unrecognized command|unknown option") |
||||
|
||||
set(CMAKE_REQUIRED_FLAGS) |
||||
|
||||
if(HAVE_SANITIZER_${check}) |
||||
set(available_checks ${compile_checks}) |
||||
endif() |
||||
endforeach() |
||||
|
||||
set(${supported_checks} ${available_checks}) |
||||
endmacro() |
||||
|
||||
macro(add_address_sanitizer) |
||||
set(known_checks |
||||
address |
||||
pointer-compare |
||||
pointer-subtract |
||||
) |
||||
|
||||
check_sanitizer_support("${known_checks}" supported_checks) |
||||
if(NOT ${supported_checks} STREQUAL "") |
||||
message(STATUS "Address sanitizer is enabled: ${supported_checks}") |
||||
add_compile_options(-fsanitize=${supported_checks}) |
||||
add_link_options(-fsanitize=${supported_checks}) |
||||
add_common_sanitizer_flags() |
||||
else() |
||||
message(STATUS "Address sanitizer is not supported") |
||||
endif() |
||||
|
||||
if(CMAKE_CROSSCOMPILING_EMULATOR) |
||||
# Only check for leak sanitizer if not cross-compiling due to qemu crash |
||||
message(WARNING "Leak sanitizer is not supported when cross compiling") |
||||
else() |
||||
# Leak sanitizer requires address sanitizer |
||||
check_sanitizer_support("leak" supported_checks) |
||||
if(NOT ${supported_checks} STREQUAL "") |
||||
message(STATUS "Leak sanitizer is enabled: ${supported_checks}") |
||||
add_compile_options(-fsanitize=${supported_checks}) |
||||
add_link_options(-fsanitize=${supported_checks}) |
||||
add_common_sanitizer_flags() |
||||
else() |
||||
message(STATUS "Leak sanitizer is not supported") |
||||
endif() |
||||
endif() |
||||
endmacro() |
||||
|
||||
macro(add_memory_sanitizer) |
||||
check_sanitizer_support("memory" supported_checks) |
||||
if(NOT ${supported_checks} STREQUAL "") |
||||
message(STATUS "Memory sanitizer is enabled: ${supported_checks}") |
||||
add_compile_options(-fsanitize=${supported_checks}) |
||||
add_link_options(-fsanitize=${supported_checks}) |
||||
add_common_sanitizer_flags() |
||||
|
||||
check_c_compiler_flag(-fsanitize-memory-track-origins HAVE_MEMORY_TRACK_ORIGINS) |
||||
if(HAVE_MEMORY_TRACK_ORIGINS) |
||||
add_compile_options(-fsanitize-memory-track-origins) |
||||
add_link_options(-fsanitize-memory-track-origins) |
||||
endif() |
||||
else() |
||||
message(STATUS "Memory sanitizer is not supported") |
||||
endif() |
||||
endmacro() |
||||
|
||||
macro(add_thread_sanitizer) |
||||
check_sanitizer_support("thread" supported_checks) |
||||
if(NOT ${supported_checks} STREQUAL "") |
||||
message(STATUS "Thread sanitizer is enabled: ${supported_checks}") |
||||
add_compile_options(-fsanitize=${supported_checks}) |
||||
add_link_options(-fsanitize=${supported_checks}) |
||||
add_common_sanitizer_flags() |
||||
else() |
||||
message(STATUS "Thread sanitizer is not supported") |
||||
endif() |
||||
endmacro() |
||||
|
||||
macro(add_undefined_sanitizer) |
||||
set(known_checks |
||||
array-bounds |
||||
bool |
||||
bounds |
||||
builtin |
||||
enum |
||||
float-cast-overflow |
||||
float-divide-by-zero |
||||
function |
||||
integer-divide-by-zero |
||||
local-bounds |
||||
null |
||||
nonnull-attribute |
||||
pointer-overflow |
||||
return |
||||
returns-nonnull-attribute |
||||
shift |
||||
shift-base |
||||
shift-exponent |
||||
signed-integer-overflow |
||||
undefined |
||||
unsigned-integer-overflow |
||||
unsigned-shift-base |
||||
vla-bound |
||||
vptr |
||||
) |
||||
|
||||
# Only check for alignment sanitizer flag if unaligned access is not supported |
||||
if(NOT WITH_UNALIGNED) |
||||
list(APPEND known_checks alignment) |
||||
endif() |
||||
# Object size sanitizer has no effect at -O0 and produces compiler warning if enabled |
||||
if(NOT CMAKE_C_FLAGS MATCHES "-O0") |
||||
list(APPEND known_checks object-size) |
||||
endif() |
||||
|
||||
check_sanitizer_support("${known_checks}" supported_checks) |
||||
|
||||
if(NOT ${supported_checks} STREQUAL "") |
||||
message(STATUS "Undefined behavior sanitizer is enabled: ${supported_checks}") |
||||
add_compile_options(-fsanitize=${supported_checks}) |
||||
add_link_options(-fsanitize=${supported_checks}) |
||||
|
||||
# Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if |
||||
# it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing. |
||||
if(WITH_UNALIGNED) |
||||
add_compile_options(-fno-sanitize=alignment) |
||||
endif() |
||||
|
||||
add_common_sanitizer_flags() |
||||
else() |
||||
message(STATUS "Undefined behavior sanitizer is not supported") |
||||
endif() |
||||
endmacro() |
@ -0,0 +1,42 @@ |
||||
/* crc32.c -- compute the CRC-32 of a data stream
|
||||
* Copyright (C) 1995-2022 Mark Adler |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
* |
||||
* This interleaved implementation of a CRC makes use of pipelined multiple |
||||
* arithmetic-logic units, commonly found in modern CPU cores. It is due to |
||||
* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. |
||||
*/ |
||||
|
||||
#include "zbuild.h" |
||||
#include "functable.h" |
||||
#include "crc32_braid_tbl.h" |
||||
|
||||
/* ========================================================================= */ |
||||
|
||||
const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) { |
||||
return (const uint32_t *)crc_table; |
||||
} |
||||
|
||||
#ifdef ZLIB_COMPAT |
||||
unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) { |
||||
if (buf == NULL) return 0; |
||||
|
||||
return (unsigned long)FUNCTABLE_CALL(crc32)((uint32_t)crc, buf, len); |
||||
} |
||||
#else |
||||
uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) { |
||||
if (buf == NULL) return 0; |
||||
|
||||
return FUNCTABLE_CALL(crc32)(crc, buf, len); |
||||
} |
||||
#endif |
||||
|
||||
#ifdef ZLIB_COMPAT |
||||
unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) { |
||||
return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len); |
||||
} |
||||
#else |
||||
uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) { |
||||
return PREFIX(crc32_z)(crc, buf, len); |
||||
} |
||||
#endif |
@ -0,0 +1,16 @@ |
||||
/* crc32.h -- crc32 folding interface
|
||||
* Copyright (C) 2021 Nathan Moinvaziri |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
#ifndef CRC32_H_ |
||||
#define CRC32_H_ |
||||
|
||||
#define CRC32_FOLD_BUFFER_SIZE (16 * 4) |
||||
/* sizeof(__m128i) * (4 folds) */ |
||||
|
||||
typedef struct crc32_fold_s { |
||||
uint8_t fold[CRC32_FOLD_BUFFER_SIZE]; |
||||
uint32_t value; |
||||
} crc32_fold; |
||||
|
||||
#endif |
@ -1,21 +0,0 @@ |
||||
/* crc32_fold.h -- crc32 folding interface
|
||||
* Copyright (C) 2021 Nathan Moinvaziri |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
#ifndef CRC32_FOLD_H_ |
||||
#define CRC32_FOLD_H_ |
||||
|
||||
#define CRC32_FOLD_BUFFER_SIZE (16 * 4) |
||||
/* sizeof(__m128i) * (4 folds) */ |
||||
|
||||
typedef struct crc32_fold_s { |
||||
uint8_t fold[CRC32_FOLD_BUFFER_SIZE]; |
||||
uint32_t value; |
||||
} crc32_fold; |
||||
|
||||
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc); |
||||
Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); |
||||
Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); |
||||
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc); |
||||
|
||||
#endif |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue