mirror of https://github.com/opencv/opencv.git
parent
d2e169929c
commit
0831bd3990
30 changed files with 17693 additions and 9932 deletions
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,134 @@ |
||||
|
||||
/* arm_init.c - NEON optimised filter functions
|
||||
* |
||||
* Copyright (c) 2014 Glenn Randers-Pehrson |
||||
* Written by Mans Rullgard, 2011. |
||||
* Last changed in libpng 1.6.16 [December 22, 2014] |
||||
* |
||||
* This code is released under the libpng license. |
||||
* For conditions of distribution and use, see the disclaimer |
||||
* and license in png.h |
||||
*/ |
||||
/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
|
||||
* called. |
||||
*/ |
||||
#define _POSIX_SOURCE 1 |
||||
|
||||
#include "../pngpriv.h" |
||||
|
||||
#ifdef PNG_READ_SUPPORTED |
||||
|
||||
#if PNG_ARM_NEON_OPT > 0 |
||||
#ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */ |
||||
/* WARNING: it is strongly recommended that you do not build libpng with
|
||||
* run-time checks for CPU features if at all possible. In the case of the ARM |
||||
* NEON instructions there is no processor-specific way of detecting the |
||||
* presence of the required support, therefore run-time detection is extremely |
||||
* OS specific. |
||||
* |
||||
* You may set the macro PNG_ARM_NEON_FILE to the file name of file containing |
||||
* a fragment of C source code which defines the png_have_neon function. There |
||||
* are a number of implementations in contrib/arm-neon, but the only one that |
||||
* has partial support is contrib/arm-neon/linux.c - a generic Linux |
||||
* implementation which reads /proc/cpufino. |
||||
*/ |
||||
#ifndef PNG_ARM_NEON_FILE |
||||
# ifdef __linux__ |
||||
# define PNG_ARM_NEON_FILE "contrib/arm-neon/linux.c" |
||||
# endif |
||||
#endif |
||||
|
||||
#ifdef PNG_ARM_NEON_FILE |
||||
|
||||
#include <signal.h> /* for sig_atomic_t */ |
||||
static int png_have_neon(png_structp png_ptr); |
||||
#include PNG_ARM_NEON_FILE |
||||
|
||||
#else /* PNG_ARM_NEON_FILE */ |
||||
# error "PNG_ARM_NEON_FILE undefined: no support for run-time ARM NEON checks" |
||||
#endif /* PNG_ARM_NEON_FILE */ |
||||
#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */ |
||||
|
||||
#ifndef PNG_ALIGNED_MEMORY_SUPPORTED |
||||
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" |
||||
#endif |
||||
|
||||
void |
||||
png_init_filter_functions_neon(png_structp pp, unsigned int bpp) |
||||
{ |
||||
/* The switch statement is compiled in for ARM_NEON_API, the call to
|
||||
* png_have_neon is compiled in for ARM_NEON_CHECK. If both are defined |
||||
* the check is only performed if the API has not set the NEON option on |
||||
* or off explicitly. In this case the check controls what happens. |
||||
* |
||||
* If the CHECK is not compiled in and the option is UNSET the behavior prior |
||||
* to 1.6.7 was to use the NEON code - this was a bug caused by having the |
||||
* wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF, |
||||
* as documented in png.h |
||||
*/ |
||||
#ifdef PNG_ARM_NEON_API_SUPPORTED |
||||
switch ((pp->options >> PNG_ARM_NEON) & 3) |
||||
{ |
||||
case PNG_OPTION_UNSET: |
||||
/* Allow the run-time check to execute if it has been enabled -
|
||||
* thus both API and CHECK can be turned on. If it isn't supported |
||||
* this case will fall through to the 'default' below, which just |
||||
* returns. |
||||
*/ |
||||
#endif /* PNG_ARM_NEON_API_SUPPORTED */ |
||||
#ifdef PNG_ARM_NEON_CHECK_SUPPORTED |
||||
{ |
||||
static volatile sig_atomic_t no_neon = -1; /* not checked */ |
||||
|
||||
if (no_neon < 0) |
||||
no_neon = !png_have_neon(pp); |
||||
|
||||
if (no_neon) |
||||
return; |
||||
} |
||||
#ifdef PNG_ARM_NEON_API_SUPPORTED |
||||
break; |
||||
#endif |
||||
#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */ |
||||
|
||||
#ifdef PNG_ARM_NEON_API_SUPPORTED |
||||
default: /* OFF or INVALID */ |
||||
return; |
||||
|
||||
case PNG_OPTION_ON: |
||||
/* Option turned on */ |
||||
break; |
||||
} |
||||
#endif |
||||
|
||||
/* IMPORTANT: any new external functions used here must be declared using
|
||||
* PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the |
||||
* 'prefix' option to configure works: |
||||
* |
||||
* ./configure --with-libpng-prefix=foobar_ |
||||
* |
||||
* Verify you have got this right by running the above command, doing a build |
||||
* and examining pngprefix.h; it must contain a #define for every external |
||||
* function you add. (Notice that this happens automatically for the |
||||
* initialization function.) |
||||
*/ |
||||
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; |
||||
|
||||
if (bpp == 3) |
||||
{ |
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon; |
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon; |
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = |
||||
png_read_filter_row_paeth3_neon; |
||||
} |
||||
|
||||
else if (bpp == 4) |
||||
{ |
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon; |
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon; |
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = |
||||
png_read_filter_row_paeth4_neon; |
||||
} |
||||
} |
||||
#endif /* PNG_ARM_NEON_OPT > 0 */ |
||||
#endif /* READ */ |
@ -0,0 +1,373 @@ |
||||
|
||||
/* filter_neon_intrinsics.c - NEON optimised filter functions
|
||||
* |
||||
* Copyright (c) 2014 Glenn Randers-Pehrson |
||||
* Written by James Yu <james.yu at linaro.org>, October 2013. |
||||
* Based on filter_neon.S, written by Mans Rullgard, 2011. |
||||
* |
||||
* Last changed in libpng 1.6.16 [December 22, 2014] |
||||
* |
||||
* This code is released under the libpng license. |
||||
* For conditions of distribution and use, see the disclaimer |
||||
* and license in png.h |
||||
*/ |
||||
|
||||
#include "../pngpriv.h" |
||||
|
||||
#ifdef PNG_READ_SUPPORTED |
||||
|
||||
/* This code requires -mfpu=neon on the command line: */ |
||||
#if PNG_ARM_NEON_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ |
||||
|
||||
#include <arm_neon.h> |
||||
|
||||
/* libpng row pointers are not necessarily aligned to any particular boundary,
|
||||
* however this code will only work with appropriate alignment. arm/arm_init.c |
||||
* checks for this (and will not compile unless it is done). This code uses |
||||
* variants of png_aligncast to avoid compiler warnings. |
||||
*/ |
||||
#define png_ptr(type,pointer) png_aligncast(type *,pointer) |
||||
#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer) |
||||
|
||||
/* The following relies on a variable 'temp_pointer' being declared with type
|
||||
* 'type'. This is written this way just to hide the GCC strict aliasing |
||||
* warning; note that the code is safe because there never is an alias between |
||||
* the input and output pointers. |
||||
*/ |
||||
#define png_ldr(type,pointer)\ |
||||
(temp_pointer = png_ptr(type,pointer), *temp_pointer) |
||||
|
||||
#if PNG_ARM_NEON_OPT > 0 |
||||
|
||||
void |
||||
png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row, |
||||
png_const_bytep prev_row) |
||||
{ |
||||
png_bytep rp = row; |
||||
png_bytep rp_stop = row + row_info->rowbytes; |
||||
png_const_bytep pp = prev_row; |
||||
|
||||
for (; rp < rp_stop; rp += 16, pp += 16) |
||||
{ |
||||
uint8x16_t qrp, qpp; |
||||
|
||||
qrp = vld1q_u8(rp); |
||||
qpp = vld1q_u8(pp); |
||||
qrp = vaddq_u8(qrp, qpp); |
||||
vst1q_u8(rp, qrp); |
||||
} |
||||
} |
||||
|
||||
void |
||||
png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row, |
||||
png_const_bytep prev_row) |
||||
{ |
||||
png_bytep rp = row; |
||||
png_bytep rp_stop = row + row_info->rowbytes; |
||||
|
||||
uint8x16_t vtmp = vld1q_u8(rp); |
||||
uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp); |
||||
uint8x8x2_t vrp = *vrpt; |
||||
|
||||
uint8x8x4_t vdest; |
||||
vdest.val[3] = vdup_n_u8(0); |
||||
|
||||
for (; rp < rp_stop;) |
||||
{ |
||||
uint8x8_t vtmp1, vtmp2; |
||||
uint32x2_t *temp_pointer; |
||||
|
||||
vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3); |
||||
vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]); |
||||
vtmp2 = vext_u8(vrp.val[0], vrp.val[1], 6); |
||||
vdest.val[1] = vadd_u8(vdest.val[0], vtmp1); |
||||
|
||||
vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); |
||||
vdest.val[2] = vadd_u8(vdest.val[1], vtmp2); |
||||
vdest.val[3] = vadd_u8(vdest.val[2], vtmp1); |
||||
|
||||
vtmp = vld1q_u8(rp + 12); |
||||
vrpt = png_ptr(uint8x8x2_t, &vtmp); |
||||
vrp = *vrpt; |
||||
|
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0); |
||||
rp += 3; |
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0); |
||||
rp += 3; |
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0); |
||||
rp += 3; |
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0); |
||||
rp += 3; |
||||
} |
||||
|
||||
PNG_UNUSED(prev_row) |
||||
} |
||||
|
||||
void |
||||
png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row, |
||||
png_const_bytep prev_row) |
||||
{ |
||||
png_bytep rp = row; |
||||
png_bytep rp_stop = row + row_info->rowbytes; |
||||
|
||||
uint8x8x4_t vdest; |
||||
vdest.val[3] = vdup_n_u8(0); |
||||
|
||||
for (; rp < rp_stop; rp += 16) |
||||
{ |
||||
uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp)); |
||||
uint8x8x4_t *vrpt = png_ptr(uint8x8x4_t,&vtmp); |
||||
uint8x8x4_t vrp = *vrpt; |
||||
uint32x2x4_t *temp_pointer; |
||||
|
||||
vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]); |
||||
vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]); |
||||
vdest.val[2] = vadd_u8(vdest.val[1], vrp.val[2]); |
||||
vdest.val[3] = vadd_u8(vdest.val[2], vrp.val[3]); |
||||
vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0); |
||||
} |
||||
|
||||
PNG_UNUSED(prev_row) |
||||
} |
||||
|
||||
void |
||||
png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row, |
||||
png_const_bytep prev_row) |
||||
{ |
||||
png_bytep rp = row; |
||||
png_const_bytep pp = prev_row; |
||||
png_bytep rp_stop = row + row_info->rowbytes; |
||||
|
||||
uint8x16_t vtmp; |
||||
uint8x8x2_t *vrpt; |
||||
uint8x8x2_t vrp; |
||||
uint8x8x4_t vdest; |
||||
vdest.val[3] = vdup_n_u8(0); |
||||
|
||||
vtmp = vld1q_u8(rp); |
||||
vrpt = png_ptr(uint8x8x2_t,&vtmp); |
||||
vrp = *vrpt; |
||||
|
||||
for (; rp < rp_stop; pp += 12) |
||||
{ |
||||
uint8x8_t vtmp1, vtmp2, vtmp3; |
||||
|
||||
uint8x8x2_t *vppt; |
||||
uint8x8x2_t vpp; |
||||
|
||||
uint32x2_t *temp_pointer; |
||||
|
||||
vtmp = vld1q_u8(pp); |
||||
vppt = png_ptr(uint8x8x2_t,&vtmp); |
||||
vpp = *vppt; |
||||
|
||||
vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3); |
||||
vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]); |
||||
vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); |
||||
|
||||
vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3); |
||||
vtmp3 = vext_u8(vrp.val[0], vrp.val[1], 6); |
||||
vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2); |
||||
vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); |
||||
|
||||
vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 6); |
||||
vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); |
||||
|
||||
vtmp = vld1q_u8(rp + 12); |
||||
vrpt = png_ptr(uint8x8x2_t,&vtmp); |
||||
vrp = *vrpt; |
||||
|
||||
vdest.val[2] = vhadd_u8(vdest.val[1], vtmp2); |
||||
vdest.val[2] = vadd_u8(vdest.val[2], vtmp3); |
||||
|
||||
vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1); |
||||
|
||||
vdest.val[3] = vhadd_u8(vdest.val[2], vtmp2); |
||||
vdest.val[3] = vadd_u8(vdest.val[3], vtmp1); |
||||
|
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0); |
||||
rp += 3; |
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0); |
||||
rp += 3; |
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0); |
||||
rp += 3; |
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0); |
||||
rp += 3; |
||||
} |
||||
} |
||||
|
||||
void |
||||
png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row, |
||||
png_const_bytep prev_row) |
||||
{ |
||||
png_bytep rp = row; |
||||
png_bytep rp_stop = row + row_info->rowbytes; |
||||
png_const_bytep pp = prev_row; |
||||
|
||||
uint8x8x4_t vdest; |
||||
vdest.val[3] = vdup_n_u8(0); |
||||
|
||||
for (; rp < rp_stop; rp += 16, pp += 16) |
||||
{ |
||||
uint32x2x4_t vtmp; |
||||
uint8x8x4_t *vrpt, *vppt; |
||||
uint8x8x4_t vrp, vpp; |
||||
uint32x2x4_t *temp_pointer; |
||||
|
||||
vtmp = vld4_u32(png_ptr(uint32_t,rp)); |
||||
vrpt = png_ptr(uint8x8x4_t,&vtmp); |
||||
vrp = *vrpt; |
||||
vtmp = vld4_u32(png_ptrc(uint32_t,pp)); |
||||
vppt = png_ptr(uint8x8x4_t,&vtmp); |
||||
vpp = *vppt; |
||||
|
||||
vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]); |
||||
vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); |
||||
vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]); |
||||
vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]); |
||||
vdest.val[2] = vhadd_u8(vdest.val[1], vpp.val[2]); |
||||
vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]); |
||||
vdest.val[3] = vhadd_u8(vdest.val[2], vpp.val[3]); |
||||
vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]); |
||||
|
||||
vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0); |
||||
} |
||||
} |
||||
|
||||
static uint8x8_t |
||||
paeth(uint8x8_t a, uint8x8_t b, uint8x8_t c) |
||||
{ |
||||
uint8x8_t d, e; |
||||
uint16x8_t p1, pa, pb, pc; |
||||
|
||||
p1 = vaddl_u8(a, b); /* a + b */ |
||||
pc = vaddl_u8(c, c); /* c * 2 */ |
||||
pa = vabdl_u8(b, c); /* pa */ |
||||
pb = vabdl_u8(a, c); /* pb */ |
||||
pc = vabdq_u16(p1, pc); /* pc */ |
||||
|
||||
p1 = vcleq_u16(pa, pb); /* pa <= pb */ |
||||
pa = vcleq_u16(pa, pc); /* pa <= pc */ |
||||
pb = vcleq_u16(pb, pc); /* pb <= pc */ |
||||
|
||||
p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */ |
||||
|
||||
d = vmovn_u16(pb); |
||||
e = vmovn_u16(p1); |
||||
|
||||
d = vbsl_u8(d, b, c); |
||||
e = vbsl_u8(e, a, d); |
||||
|
||||
return e; |
||||
} |
||||
|
||||
void |
||||
png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row, |
||||
png_const_bytep prev_row) |
||||
{ |
||||
png_bytep rp = row; |
||||
png_const_bytep pp = prev_row; |
||||
png_bytep rp_stop = row + row_info->rowbytes; |
||||
|
||||
uint8x16_t vtmp; |
||||
uint8x8x2_t *vrpt; |
||||
uint8x8x2_t vrp; |
||||
uint8x8_t vlast = vdup_n_u8(0); |
||||
uint8x8x4_t vdest; |
||||
vdest.val[3] = vdup_n_u8(0); |
||||
|
||||
vtmp = vld1q_u8(rp); |
||||
vrpt = png_ptr(uint8x8x2_t,&vtmp); |
||||
vrp = *vrpt; |
||||
|
||||
for (; rp < rp_stop; pp += 12) |
||||
{ |
||||
uint8x8x2_t *vppt; |
||||
uint8x8x2_t vpp; |
||||
uint8x8_t vtmp1, vtmp2, vtmp3; |
||||
uint32x2_t *temp_pointer; |
||||
|
||||
vtmp = vld1q_u8(pp); |
||||
vppt = png_ptr(uint8x8x2_t,&vtmp); |
||||
vpp = *vppt; |
||||
|
||||
vdest.val[0] = paeth(vdest.val[3], vpp.val[0], vlast); |
||||
vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); |
||||
|
||||
vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3); |
||||
vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3); |
||||
vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]); |
||||
vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); |
||||
|
||||
vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 6); |
||||
vtmp3 = vext_u8(vpp.val[0], vpp.val[1], 6); |
||||
vdest.val[2] = paeth(vdest.val[1], vtmp3, vtmp2); |
||||
vdest.val[2] = vadd_u8(vdest.val[2], vtmp1); |
||||
|
||||
vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); |
||||
vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1); |
||||
|
||||
vtmp = vld1q_u8(rp + 12); |
||||
vrpt = png_ptr(uint8x8x2_t,&vtmp); |
||||
vrp = *vrpt; |
||||
|
||||
vdest.val[3] = paeth(vdest.val[2], vtmp2, vtmp3); |
||||
vdest.val[3] = vadd_u8(vdest.val[3], vtmp1); |
||||
|
||||
vlast = vtmp2; |
||||
|
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0); |
||||
rp += 3; |
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0); |
||||
rp += 3; |
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0); |
||||
rp += 3; |
||||
vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0); |
||||
rp += 3; |
||||
} |
||||
} |
||||
|
||||
void |
||||
png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row, |
||||
png_const_bytep prev_row) |
||||
{ |
||||
png_bytep rp = row; |
||||
png_bytep rp_stop = row + row_info->rowbytes; |
||||
png_const_bytep pp = prev_row; |
||||
|
||||
uint8x8_t vlast = vdup_n_u8(0); |
||||
uint8x8x4_t vdest; |
||||
vdest.val[3] = vdup_n_u8(0); |
||||
|
||||
for (; rp < rp_stop; rp += 16, pp += 16) |
||||
{ |
||||
uint32x2x4_t vtmp; |
||||
uint8x8x4_t *vrpt, *vppt; |
||||
uint8x8x4_t vrp, vpp; |
||||
uint32x2x4_t *temp_pointer; |
||||
|
||||
vtmp = vld4_u32(png_ptr(uint32_t,rp)); |
||||
vrpt = png_ptr(uint8x8x4_t,&vtmp); |
||||
vrp = *vrpt; |
||||
vtmp = vld4_u32(png_ptrc(uint32_t,pp)); |
||||
vppt = png_ptr(uint8x8x4_t,&vtmp); |
||||
vpp = *vppt; |
||||
|
||||
vdest.val[0] = paeth(vdest.val[3], vpp.val[0], vlast); |
||||
vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); |
||||
vdest.val[1] = paeth(vdest.val[0], vpp.val[1], vpp.val[0]); |
||||
vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]); |
||||
vdest.val[2] = paeth(vdest.val[1], vpp.val[2], vpp.val[1]); |
||||
vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]); |
||||
vdest.val[3] = paeth(vdest.val[2], vpp.val[3], vpp.val[2]); |
||||
vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]); |
||||
|
||||
vlast = vpp.val[3]; |
||||
|
||||
vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0); |
||||
} |
||||
} |
||||
|
||||
#endif /* PNG_ARM_NEON_OPT > 0 */ |
||||
#endif /* PNG_ARM_NEON_IMPLEMENTATION == 1 (intrinsics) */ |
||||
#endif /* READ */ |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue