Skip to content

Commit

Permalink
Update the support for target-specific code
Browse files Browse the repository at this point in the history
The change removes the need for build configuration of target specific
code such as SIMD enhancements.

The extensible framework is based on checks in the new file named
pngtarget.h and allows per target description of requirements and
capabilities using macros with the prefix PNG_TARGET_.  These macros are
interpreted by the core libpng code in a target independent way.

The PNG_TARGET_ prefixed macros describe the target specific
implementation and document how to include it.  The actual inclusion of
the code is done, at present, by a single target independent core file
named pngsimd.c.

This approach ensures that no special configuration in the build system
is required on any system.  It is not required on a system which is
compiling for the build host where we can reasonably assume that the
compiler is correctly set up.  Neither is it required for cross builds
so long as the toolchain is correctly set up.  In this case the
toolchain means the cross build system and any ancilliary programs it
requires.

More details of the implementation are included in the source files
named pngtarget.h and pngsimd.c.  These files document in C comments how
additional target specific code might be added.

The changes consist primarily of movement of target specific code from
the core libpng source files to the existing target specific
subdirectories such as the ones named intel and arm.  This isolates the
code into the corresponding subdirectory and eliminates the requirement
for changes across the libpng structure.

As part of this the ARM processor NEON instruction set specific palette
to RGB code has been generalised to allow implementation in other
architectures.  The resultant updated version of the original (ARM Ltd)
implementation has been moved to the subdirectory named arm.

In addition some selected changes have been included to allow testing
with higher warning levels than the typical compiler defaults.

Reviewed-by: Cosmin Truta <ctruta@gmail.com>
Signed-off-by: John Bowler <jbowler@acm.org>
Signed-off-by: Cosmin Truta <ctruta@gmail.com>
  • Loading branch information
jbowler authored and ctruta committed Oct 10, 2024
1 parent bea7fde commit ed68998
Show file tree
Hide file tree
Showing 31 changed files with 857 additions and 1,241 deletions.
49 changes: 1 addition & 48 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,49 +128,6 @@ else()
# libm is not available or not needed.
endif()

if(PNG_HARDWARE_OPTIMIZATIONS)

# Set definitions and sources for ARM.
set(libpng_arm_sources
arm/arm_init.c
arm/filter_neon_intrinsics.c
arm/palette_neon_intrinsics.c)

# Set definitions and sources for PowerPC.
set(libpng_powerpc_sources
powerpc/powerpc_init.c
powerpc/filter_vsx_intrinsics.c)

# Set definitions and sources for Intel.
set(libpng_intel_sources
intel/intel_init.c
intel/filter_sse2_intrinsics.c)

# Set definitions and sources for MIPS.
set(libpng_mips_sources
mips/mips_init.c
mips/filter_msa_intrinsics.c
mips/filter_mmi_inline_assembly.c)

# Set definitions and sources for LoongArch.
set(libpng_loongarch_sources
loongarch/loongarch_lsx_init.c
loongarch/filter_lsx_intrinsics.c)

else(PNG_HARDWARE_OPTIMIZATIONS)

# Disable opt for all arches
add_definitions(
-DPNG_ARM_NEON_OPT=0
-DPNG_POWERPC_VSX_OPT=0
-DPNG_INTEL_SSE_OPT=0
-DPNG_MIPS_MSA_OPT=0
-DPNG_MIPS_MMI_OPT=0
-DPNG_LOONGARCH_LSX_OPT=0
)

endif(PNG_HARDWARE_OPTIMIZATIONS)

option(ld-version-script "Enable linker version script" ON)
if(ld-version-script AND NOT ANDROID AND NOT APPLE)
# Check if LD supports linker scripts.
Expand Down Expand Up @@ -481,11 +438,7 @@ set(libpng_sources
pngwrite.c
pngwtran.c
pngwutil.c
${libpng_arm_sources}
${libpng_intel_sources}
${libpng_mips_sources}
${libpng_powerpc_sources}
${libpng_loongarch_sources}
pngsimd.c
)
set(pngtest_sources
pngtest.c
Expand Down
10 changes: 2 additions & 8 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,8 @@ lib_LTLIBRARIES=libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@.la
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
pngget.c pngmem.c pngpread.c pngread.c pngrio.c pngrtran.c pngrutil.c\
pngset.c pngtrans.c pngwio.c pngwrite.c pngwtran.c pngwutil.c\
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa\
arm/arm_init.c arm/filter_neon_intrinsics.c\
arm/palette_neon_intrinsics.c\
intel/intel_init.c intel/filter_sse2_intrinsics.c\
loongarch/loongarch_lsx_init.c loongarch/filter_lsx_intrinsics.c\
mips/mips_init.c mips/filter_msa_intrinsics.c\
mips/filter_mmi_inline_assembly.c\
powerpc/powerpc_init.c powerpc/filter_vsx_intrinsics.c
pngsimd.c\
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa

nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h

Expand Down
294 changes: 194 additions & 100 deletions arm/arm_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,113 +9,27 @@
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#define png_target_impl "arm-neon"

/* This module requires POSIX 1003.1 functions. */
#define _POSIX_SOURCE 1

#include "../pngpriv.h"

#ifdef PNG_READ_SUPPORTED

#if PNG_ARM_NEON_OPT > 0
#ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */
/* WARNING: it is strongly recommended that you do not build libpng with
* run-time checks for CPU features if at all possible. In the case of the ARM
* NEON instructions there is no processor-specific way of detecting the
* presence of the required support, therefore run-time detection is extremely
* OS specific.
*
* You may set the macro PNG_ARM_NEON_FILE to the file name of file containing
* a fragment of C source code which defines the png_have_neon function. There
* are a number of implementations in contrib/arm-neon, but the only one that
* has partial support is contrib/arm-neon/linux.c - a generic Linux
* implementation which reads /proc/cpufino.
*/
#include <signal.h> /* for sig_atomic_t */

#ifndef PNG_ARM_NEON_FILE
# if defined(__aarch64__) || defined(_M_ARM64)
/* ARM Neon is expected to be unconditionally available on ARM64. */
# error "PNG_ARM_NEON_CHECK_SUPPORTED must not be defined on ARM64"
# elif defined(__ARM_NEON__) || defined(__ARM_NEON)
/* ARM Neon is expected to be available on the target CPU architecture. */
# error "PNG_ARM_NEON_CHECK_SUPPORTED must not be defined on this CPU arch"
# elif defined(__linux__)
# define PNG_ARM_NEON_FILE "contrib/arm-neon/linux.c"
# else
# error "No support for run-time ARM Neon checking; use compile-time options"
# endif
#endif

static int png_have_neon(png_structp png_ptr);
#ifdef PNG_ARM_NEON_FILE
# include PNG_ARM_NEON_FILE
#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
# include <arm64_neon.h>
#else
# include <arm_neon.h>
#endif
#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */

#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
#endif
/* Obtain the definitions of the actual filter functions: */
#include "filter_neon_intrinsics.c"

void
static void
png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
{
/* The switch statement is compiled in for ARM_NEON_API, the call to
* png_have_neon is compiled in for ARM_NEON_CHECK. If both are defined
* the check is only performed if the API has not set the NEON option on
* or off explicitly. In this case the check controls what happens.
*
* If the CHECK is not compiled in and the option is UNSET the behavior prior
* to 1.6.7 was to use the NEON code - this was a bug caused by having the
* wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF,
* as documented in png.h
*/
png_debug(1, "in png_init_filter_functions_neon");
#ifdef PNG_ARM_NEON_API_SUPPORTED
switch ((pp->options >> PNG_ARM_NEON) & 3)
{
case PNG_OPTION_UNSET:
/* Allow the run-time check to execute if it has been enabled -
* thus both API and CHECK can be turned on. If it isn't supported
* this case will fall through to the 'default' below, which just
* returns.
*/
#endif /* PNG_ARM_NEON_API_SUPPORTED */
#ifdef PNG_ARM_NEON_CHECK_SUPPORTED
{
static volatile sig_atomic_t no_neon = -1; /* not checked */

if (no_neon < 0)
no_neon = !png_have_neon(pp);

if (no_neon)
return;
}
#ifdef PNG_ARM_NEON_API_SUPPORTED
break;
#endif
#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */

#ifdef PNG_ARM_NEON_API_SUPPORTED
default: /* OFF or INVALID */
return;

case PNG_OPTION_ON:
/* Option turned on */
break;
}
#endif

/* IMPORTANT: any new external functions used here must be declared using
* PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the
* 'prefix' option to configure works:
*
* ./configure --with-libpng-prefix=foobar_
/* IMPORTANT: DO NOT DEFINE EXTERNAL FUNCTIONS HERE
*
* Verify you have got this right by running the above command, doing a build
* and examining pngprefix.h; it must contain a #define for every external
* function you add. (Notice that this happens automatically for the
* initialization function.)
* This is because external functions must be declared with
* PNG_INTERNAL_FUNCTION in pngpriv.h; without this the PNG_PREFIX option to
* the build will not work (it will not know about these symbols).
*/
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;

Expand All @@ -135,5 +49,185 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
png_read_filter_row_paeth4_neon;
}
}
#endif /* PNG_ARM_NEON_OPT > 0 */
#endif /* READ */

#define png_target_init_filter_functions_impl png_init_filter_functions_neon

#ifdef PNG_TARGET_STORES_DATA
/* png_target_free_data_impl
* Must be defined if the implementation stores data in
* png_struct::target_data. Need not be defined otherwise.
*/
static void
png_target_free_data_arm(png_structrp pp)
{
png_voidp ptr = pp->target_data;
pp->target_data = NULL;
png_free(pp, ptr);
}
#define png_target_free_data_impl png_target_free_data_arm
#endif /* TARGET_STORES_DATA */

#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE
/* png_target_do_expand_palette_impl [flag: png_target_expand_palette]
* static function
* OPTIONAL
* Handles the transform. Need not be defined, only called if the
* state contains png_target_<transform>, may set this flag to zero, may
* return false to indicate that the transform was not done (so the
* C implementation must then execute).
*/
#include "palette_neon_intrinsics.c"

static int
png_target_do_expand_palette_neon(png_structrp png_ptr, png_row_infop row_info,
png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
int num_trans)
{
/* NOTE: it is important that this is done. row_info->width is not a CSE
* because the pointer is not declared with the 'restrict' parameter, this
* makes it a CSE but then it is very important that no one changes it in
* this function, hence the const.
*/
const png_uint_32 row_width = row_info->width;

/* NOTE: this is pretty much the original code:
*
* 1) The original code only works when the original PNG has 8-bits per
* palette. This test was in pngrtran.c and is now here.
*
* 2) The original code starts at the end and works backward but then stops
* when it is within 16 bytes of the start. It then left the remainder to
* the original code in pngrtran.c That code is now here.
*
* 3) The original code takes pointers to the end of the input and the end of
* the output; this is the way png_do_expand_palette works becuase it
* has to copy down from the end (otherwise it would overwrite the input
* data before it read it). Note that the row buffer is aliased by
* these two pointers.
*
* A consequence of passing pointers is that the row pointers (input and
* output) are forced into memory (they can't be in registers). This
* could be fixed and some compilers may be able to handle this but
* no changes have been made to the original ARM code at this point.
*/
if (row_info->color_type == PNG_COLOR_TYPE_PALETTE &&
row_info->bit_depth == 8 /* <8 requires a bigger "riffled" palette */)
{
png_const_bytep sp = row + (row_width - 1); /* 8 bit palette index */
if (num_trans > 0)
{
/* This case needs a "riffled" palette. In this implementation the
* initialization is done here, on demand.
*/
if (png_ptr->target_data == NULL)
{
/* Initialize the accelerated palette expansion.
*
* The data is now allocated using png_malloc_warn so the code
* does not error out on OOM.
*/
png_ptr->target_data = png_malloc_warn(png_ptr, 256 * 4);

/* On allocation error it is essential to clear the flag or a
* massive number of warnings will be output.
*/
if (png_ptr->target_data != NULL)
png_riffle_palette_neon(png_ptr->target_data, palette,
trans_alpha, num_trans);
else
goto clear_flag;
}

/* This is the general convention in the core transform code; when
* expanding the number of bytes in the row copy down (necessary) and
* pass a pointer to the last byte, not the first.
*
* It does not have to be preserved here but maybe it is better this
* way despite the fact that the comments in the neon palette code
* obfuscate what is happening.
*/
png_bytep dp = row + (4/*RGBA*/*row_width - 1);

/* Cosmin Truta: "Sometimes row_info->bit_depth has been changed to 8.
* In these cases, the palette hasn't been riffled."
*
* John Bowler: Explanation: The code in png_do_palette_expand
* *invariably* changes the bit depth to 8. So low palette bit depth
* gets expanded to 8 and png_row_info is adjusted to reflect this (see
* png_do_palette_expand), however the "riffle" initialization code
* checked the original png_ptr bit depth, so it didn't know this would
* happen...
*
* This could be changed; the original bit depth is irrelevant to the
* initialization code.
*/
png_uint_32 i = png_target_do_expand_palette_rgba8_neon(
png_ptr->target_data, row_info->width, &sp, &dp);

if (i == 0) /* nothing was done */
return 0; /* Return here: interlaced images start out narrow */

/* Now 'i' make not have reached row_width.
* NOTE: [i] is not the index into the row buffer, rather than is
* [row_width-i], this is the way it is done in the original
* png_do_expand_palette.
*/
for (; i < row_width; i++)
{
if ((int)(*sp) >= num_trans)
*dp-- = 0xff;
else
*dp-- = trans_alpha[*sp];
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
*dp-- = palette[*sp].red;
sp--;
}

/* Finally update row_info to reflect the expanded output: */
row_info->bit_depth = 8;
row_info->pixel_depth = 32;
row_info->rowbytes = row_width * 4;
row_info->color_type = 6;
row_info->channels = 4;
return 1;
}
else
{
/* No tRNS chunk (num_trans == 0), expand to RGB not RGBA. */
png_bytep dp = row + (3/*RGB*/*row_width - 1);

png_uint_32 i = png_target_do_expand_palette_rgb8_neon(palette,
row_info->width, &sp, &dp);

if (i == 0)
return 0; /* Return here: interlaced images start out narrow */

/* Finish the last bytes: */
for (; i < row_width; i++)
{
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
*dp-- = palette[*sp].red;
sp--;
}

row_info->bit_depth = 8;
row_info->pixel_depth = 24;
row_info->rowbytes = row_width * 3;
row_info->color_type = 2;
row_info->channels = 3;
return 1;
}
}

clear_flag:
/* Here on malloc failure and on an inapplicable image. */
png_ptr->target_state &= ~png_target_expand_palette;
return 0;
}

#define png_target_do_expand_palette_impl png_target_do_expand_palette_neon
/* EXPAND_PALETTE */

#endif /*TODO*/
Loading

0 comments on commit ed68998

Please sign in to comment.