From c5dd7753d0475ffec0f192f3181fe67a1d761680 Mon Sep 17 00:00:00 2001 From: Jenkins Date: Fri, 26 Jul 2024 12:07:30 +0000 Subject: [PATCH] Compute Library v24.07 --- Android.bp | 10 + CMakeLists.txt | 2 +- README.md | 23 +- SConscript | 32 +- SConstruct | 2 +- .../runtime/CL/functions/CLLSTMLayer.h | 9 +- .../NEON/functions/NEConvolutionLayer.h | 2 +- .../runtime/NEON/functions/NELSTMLayer.h | 9 +- .../experimental/operators/CpuActivation.h | 78 ++ .../runtime/experimental/operators/CpuGemm.h | 134 +++ .../experimental/operators/CpuGemmConv2d.h | 151 +++ .../operators/CpuGemmDirectConv2d.h | 111 ++ .../experimental/operators/CpuTranspose.h | 76 ++ .../operators/CpuWinogradConv2d.h | 119 +++ docs/Doxyfile | 110 +- docs/user_guide/errata.dox | 24 + docs/user_guide/library.dox | 2 +- .../release_version_and_change_log.dox | 10 + filelist.json | 16 +- src/BUILD.bazel | 12 +- src/CMakeLists.txt | 10 + src/common/cpuinfo/CpuIsaInfo.h | 8 +- .../NEON/kernels/NEROIAlignLayerKernel.cpp | 4 +- .../depthwise_implementation_constraints.hpp | 2 +- .../arm_conv/depthwise/depthwise_planar.hpp | 2 +- .../depthwise/interleaves/a64_s8q_3x3_dot.cpp | 2 +- .../depthwise/interleaves/a64_u8q_3x3_dot.cpp | 2 +- .../depthwise/interleaves/sve_s8q_3x3_dot.cpp | 2 +- .../depthwise/interleaves/sve_u8q_3x3_dot.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- ...6_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- ...6_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- ...6_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- ...6_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- ...6_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic_direct.cpp | 2 +- .../generic_indirect.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../arm_conv/pooling/depthfirst_driver.hpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../arm_conv/pooling/pooling_depthfirst.hpp | 2 +- .../pooling/pooling_depthfirst_generic.hpp | 2 +- src/core/NEON/kernels/arm_gemm/barrier.hpp | 2 +- src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp | 2 +- src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp | 2 +- .../kernels/arm_gemm/gemm_implementation.hpp | 2 +- src/core/NEON/kernels/arm_gemm/gemm_int16.cpp | 2 +- .../NEON/kernels/arm_gemm/gemm_uint16.cpp | 2 +- .../a32_interleave6_block1_fp32_fp32.hpp | 2 +- .../a64_interleave4_block16_s8_s8.hpp | 2 +- .../a64_interleave8_block1_fp16_fp16.hpp | 2 +- .../a64_interleave8_block1_fp16_fp32.hpp | 2 +- .../a64_interleave8_block1_fp32_fp32.hpp | 2 +- .../a64_interleave8_block1_s16_s16.hpp | 2 +- .../a64_interleave8_block1_s8_s16.hpp | 2 +- .../a64_interleave8_block1_u8_u16.hpp | 2 +- .../a64_interleave8_block2_bf16_bf16.hpp | 2 +- .../a64_interleave8_block2_fp32_fp32.hpp | 2 +- .../a64_interleave8_block4_bf16_bf16.hpp | 2 +- .../a64_interleave8_block4_fp32_bf16.hpp | 2 +- .../a64_interleave8_block4_s8_s8.hpp | 2 +- .../a64_interleave8_block8_s8_s8.hpp | 2 +- .../sme2_interleave1VL_block2_fp32_bf16.hpp | 2 +- .../sme2_interleave2VL_block2_fp32_bf16.hpp | 2 +- .../sme2_interleave4VL_block2_fp32_bf16.hpp | 2 +- .../sme_interleave1VL_bf16_bf16.hpp | 2 +- .../sme_interleave1VL_block2_bf16_bf16.hpp | 2 +- .../sme_interleave1VL_block2_fp16_fp16.hpp | 12 +- .../sme_interleave1VL_block4_s8_s8.hpp | 2 +- ...sme_interleave1VL_block4_s8_s8_summing.hpp | 2 +- .../sme_interleave1VL_block4_u8_u8.hpp | 2 +- ...sme_interleave1VL_block4_u8_u8_summing.hpp | 2 +- .../sme_interleave1VL_fp16_fp16.hpp | 2 +- .../sme_interleave1VL_fp32_fp32.hpp | 2 +- .../sme_interleave2VL_bf16_bf16.hpp | 2 +- .../sme_interleave2VL_block2_bf16_bf16.hpp | 2 +- .../sme_interleave2VL_block2_fp16_fp16.hpp | 2 +- .../sme_interleave2VL_block4_s8_s8.hpp | 2 +- ...sme_interleave2VL_block4_s8_s8_summing.hpp | 2 +- .../sme_interleave2VL_block4_u8_u8.hpp | 2 +- ...sme_interleave2VL_block4_u8_u8_summing.hpp | 2 +- .../sme_interleave2VL_fp16_fp16.hpp | 2 +- .../sme_interleave2VL_fp32_fp32.hpp | 2 +- .../sme_interleave4VL_block2_bf16_bf16.hpp | 2 +- .../sme_interleave4VL_block2_fp16_fp16.hpp | 12 +- .../sme_interleave4VL_block4_s8_s8.hpp | 2 +- ...sme_interleave4VL_block4_s8_s8_summing.hpp | 2 +- .../sme_interleave4VL_block4_u8_u8.hpp | 2 +- ...sme_interleave4VL_block4_u8_u8_summing.hpp | 2 +- .../sme_interleave4VL_fp32_fp32.hpp | 2 +- .../arm_gemm/kernels/a32_sgemm_8x6.hpp | 2 +- .../arm_gemm/kernels/a32_sgemm_8x6/a53.cpp | 2 +- .../arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp | 2 +- .../kernels/a32_sgemm_8x6/generic.cpp | 2 +- .../a64_ffhybrid_bf16fp32_mmla_6x16.hpp | 2 +- .../generic.cpp | 2 +- .../kernels/a64_ffhybrid_fp16_mla_6x32.hpp | 2 +- .../a64_ffhybrid_fp16_mla_6x32/generic.cpp | 2 +- .../kernels/a64_ffhybrid_fp32_mla_6x16.hpp | 2 +- .../a64_ffhybrid_fp32_mla_6x16/generic.cpp | 2 +- .../generic.cpp | 2 +- .../a64_ffinterleaved_bf16fp32_dot_8x12.hpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../a64_ffinterleaved_fp16_mla_8x24.hpp | 2 +- .../generic.cpp | 2 +- .../a64_ffinterleaved_fp32_mla_8x12.hpp | 2 +- .../generic.cpp | 2 +- .../arm_gemm/kernels/a64_gemm_s16_8x12.hpp | 2 +- .../arm_gemm/kernels/a64_gemm_s8_4x4.hpp | 2 +- .../kernels/a64_gemm_s8_4x4/generic.cpp | 2 +- .../arm_gemm/kernels/a64_gemm_s8_8x12.hpp | 2 +- .../kernels/a64_gemm_s8_8x12/a55r1.cpp | 2 +- .../kernels/a64_gemm_s8_8x12/generic.cpp | 2 +- .../arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp | 2 +- .../arm_gemm/kernels/a64_gemm_u16_8x12.hpp | 2 +- .../arm_gemm/kernels/a64_gemm_u8_4x4.hpp | 2 +- .../kernels/a64_gemm_u8_4x4/generic.cpp | 2 +- .../arm_gemm/kernels/a64_gemm_u8_8x12.hpp | 2 +- .../kernels/a64_gemm_u8_8x12/a55r1.cpp | 2 +- .../kernels/a64_gemm_u8_8x12/generic.cpp | 2 +- .../arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp | 2 +- .../arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp | 2 +- .../kernels/a64_hgemm_8x24/generic.cpp | 2 +- .../arm_gemm/kernels/a64_hgemm_8x24/x1.cpp | 2 +- .../kernels/a64_hybrid_bf16fp32_dot_6x16.hpp | 2 +- .../a64_hybrid_bf16fp32_dot_6x16/generic.cpp | 2 +- .../kernels/a64_hybrid_bf16fp32_mmla_6x16.hpp | 2 +- .../a64_hybrid_bf16fp32_mmla_6x16/generic.cpp | 2 +- .../kernels/a64_hybrid_fp16_mla_6x32.hpp | 2 +- .../kernels/a64_hybrid_fp16_mla_6x32/a55.cpp | 2 +- .../a64_hybrid_fp16_mla_6x32/generic.cpp | 2 +- .../kernels/a64_hybrid_fp32_mla_4x24/a55.cpp | 2 +- .../a64_hybrid_fp32_mla_4x24/generic.cpp | 2 +- .../kernels/a64_hybrid_fp32_mla_6x16/a55.cpp | 2 +- .../a64_hybrid_fp32_mla_6x16/generic.cpp | 2 +- .../kernels/a64_hybrid_fp32_mla_8x4.hpp | 2 +- .../kernels/a64_hybrid_fp32_mla_8x4/a55.cpp | 2 +- .../a64_hybrid_fp32_mla_8x4/generic.cpp | 2 +- .../a64_hybrid_fp32bf16fp32_mmla_4x24.hpp | 2 +- .../generic.cpp | 2 +- .../a64_hybrid_fp32bf16fp32_mmla_6x16.hpp | 2 +- .../generic.cpp | 2 +- .../kernels/a64_hybrid_s8qa_dot_4x16.hpp | 2 +- .../kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp | 2 +- .../a64_hybrid_s8qa_dot_4x16/generic.cpp | 2 +- .../kernels/a64_hybrid_s8qa_mmla_4x16.hpp | 2 +- .../a64_hybrid_s8qa_mmla_4x16/generic.cpp | 2 +- .../kernels/a64_hybrid_s8qs_dot_6x16.hpp | 2 +- .../kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp | 2 +- .../a64_hybrid_s8qs_dot_6x16/generic.cpp | 2 +- .../kernels/a64_hybrid_s8qs_mmla_6x16.hpp | 2 +- .../a64_hybrid_s8qs_mmla_6x16/generic.cpp | 2 +- .../kernels/a64_hybrid_s8s32_dot_6x16.hpp | 2 +- .../kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp | 2 +- .../a64_hybrid_s8s32_dot_6x16/generic.cpp | 2 +- .../kernels/a64_hybrid_s8s32_mmla_6x16.hpp | 2 +- .../a64_hybrid_s8s32_mmla_6x16/generic.cpp | 2 +- .../kernels/a64_hybrid_u8qa_dot_4x16.hpp | 2 +- .../kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp | 2 +- .../a64_hybrid_u8qa_dot_4x16/generic.cpp | 2 +- .../kernels/a64_hybrid_u8qa_mmla_4x16.hpp | 2 +- .../a64_hybrid_u8qa_mmla_4x16/generic.cpp | 2 +- .../kernels/a64_hybrid_u8u32_dot_6x16.hpp | 2 +- .../kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp | 2 +- .../a64_hybrid_u8u32_dot_6x16/generic.cpp | 2 +- .../kernels/a64_hybrid_u8u32_mmla_6x16.hpp | 2 +- .../a64_hybrid_u8u32_mmla_6x16/generic.cpp | 2 +- .../a64_interleaved_bf16fp32_dot_8x12.hpp | 2 +- .../generic.cpp | 2 +- .../a64_interleaved_bf16fp32_mmla_8x12.hpp | 2 +- .../a510.cpp | 2 +- .../generic.cpp | 2 +- .../a64_interleaved_s8s32_mmla_8x12.hpp | 2 +- .../a64_interleaved_s8s32_mmla_8x12/a510.cpp | 2 +- .../generic.cpp | 2 +- .../a64_interleaved_u8u32_mmla_8x12.hpp | 2 +- .../a64_interleaved_u8u32_mmla_8x12/a510.cpp | 2 +- .../generic.cpp | 2 +- .../arm_gemm/kernels/a64_sgemm_8x12/a53.cpp | 2 +- .../arm_gemm/kernels/a64_sgemm_8x12/a55.cpp | 2 +- .../arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp | 2 +- .../kernels/a64_sgemm_8x12/generic.cpp | 2 +- .../arm_gemm/kernels/a64_sgemm_8x12/x1.cpp | 2 +- .../arm_gemm/kernels/a64_sgemm_8x6.hpp | 2 +- .../kernels/a64_sgemm_8x6/generic.cpp | 2 +- .../a64_sgemv_pretransposed/generic.cpp | 2 +- .../a64_smallK_hybrid_fp32_mla_6x4.hpp | 2 +- .../a64_smallK_hybrid_fp32_mla_8x4.hpp | 2 +- .../a64_smallK_hybrid_s8s32_dot_6x4.hpp | 2 +- .../a64_smallK_hybrid_s8s32_dot_8x4.hpp | 2 +- .../a64_smallK_hybrid_u8u32_dot_6x4.hpp | 2 +- .../a64_smallK_hybrid_u8u32_dot_8x4.hpp | 2 +- .../sme2_gemv_bf16fp32_dot_16VL/generic.cpp | 2 +- .../sme2_gemv_fp32_mla_16VL/generic.cpp | 2 +- .../generic.cpp | 2 +- .../sme2_gemv_s8qa_dot_16VL/generic.cpp | 2 +- .../sme2_gemv_u8qa_dot_16VL/generic.cpp | 2 +- ...erleaved_nomerge_bf16fp32_mopa_1VLx4VL.hpp | 2 +- .../generic.cpp | 2 +- ...erleaved_nomerge_bf16fp32_mopa_2VLx2VL.hpp | 2 +- .../generic.cpp | 2 +- ...erleaved_nomerge_bf16fp32_mopa_4VLx1VL.hpp | 2 +- .../generic.cpp | 2 +- ...aved_nomerge_fp16fp32fp16_mopa_1VLx4VL.hpp | 2 +- .../generic.cpp | 12 +- ...aved_nomerge_fp16fp32fp16_mopa_2VLx2VL.hpp | 2 +- .../generic.cpp | 12 +- ...aved_nomerge_fp16fp32fp16_mopa_4VLx1VL.hpp | 2 +- .../generic.cpp | 12 +- ..._interleaved_nomerge_fp32_mopa_1VLx4VL.hpp | 2 +- .../generic.cpp | 2 +- ..._interleaved_nomerge_fp32_mopa_2VLx2VL.hpp | 2 +- .../generic.cpp | 2 +- ..._interleaved_nomerge_fp32_mopa_4VLx1VL.hpp | 2 +- .../generic.cpp | 2 +- ...2_interleaved_nomerge_s8q_mopa_1VLx4VL.hpp | 2 +- .../generic.cpp | 2 +- ...2_interleaved_nomerge_s8q_mopa_2VLx2VL.hpp | 2 +- .../generic.cpp | 2 +- ...2_interleaved_nomerge_s8q_mopa_4VLx1VL.hpp | 2 +- .../generic.cpp | 2 +- ...interleaved_nomerge_s8s32_mopa_1VLx4VL.hpp | 2 +- .../generic.cpp | 2 +- ...interleaved_nomerge_s8s32_mopa_2VLx2VL.hpp | 2 +- .../generic.cpp | 2 +- ...interleaved_nomerge_s8s32_mopa_4VLx1VL.hpp | 2 +- .../generic.cpp | 2 +- ...2_interleaved_nomerge_u8q_mopa_1VLx4VL.hpp | 2 +- .../generic.cpp | 2 +- ...2_interleaved_nomerge_u8q_mopa_2VLx2VL.hpp | 2 +- .../generic.cpp | 2 +- ...2_interleaved_nomerge_u8q_mopa_4VLx1VL.hpp | 2 +- .../generic.cpp | 2 +- .../sve_ffhybrid_bf16fp32_mmla_6x4VL.hpp | 2 +- .../generic.cpp | 2 +- .../kernels/sve_ffhybrid_fp16_mla_6x4VL.hpp | 2 +- .../sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp | 2 +- .../sve_ffhybrid_fp16_mla_6x4VL/generic.cpp | 2 +- .../kernels/sve_ffhybrid_fp32_mla_6x4VL.hpp | 2 +- .../sve_ffhybrid_fp32_mla_6x4VL/a64fx.cpp | 2 +- .../sve_ffhybrid_fp32_mla_6x4VL/generic.cpp | 2 +- .../generic.cpp | 2 +- .../generic.cpp | 2 +- .../sve_ffinterleaved_fp16_mla_8x3VL.hpp | 2 +- .../a64fx.cpp | 2 +- .../generic.cpp | 2 +- .../sve_ffinterleaved_fp32_mla_8x3VL.hpp | 2 +- .../a64fx.cpp | 2 +- .../generic.cpp | 2 +- .../kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp | 2 +- .../sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp | 2 +- .../sve_hybrid_bf16fp32_mmla_6x4VL.hpp | 2 +- .../generic.cpp | 2 +- .../kernels/sve_hybrid_fp16_mla_6x4VL.hpp | 2 +- .../sve_hybrid_fp16_mla_6x4VL/a64fx.cpp | 2 +- .../sve_hybrid_fp16_mla_6x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_fp32_mla_6x4VL.hpp | 2 +- .../sve_hybrid_fp32_mla_6x4VL/a64fx.cpp | 2 +- .../sve_hybrid_fp32_mla_6x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_fp32_mla_8x1VL.hpp | 2 +- .../sve_hybrid_fp32_mla_8x1VL/a64fx.cpp | 2 +- .../sve_hybrid_fp32_mla_8x1VL/generic.cpp | 2 +- .../sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp | 2 +- .../generic.cpp | 2 +- .../sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp | 2 +- .../generic.cpp | 2 +- .../kernels/sve_hybrid_s8qa_dot_4x4VL.hpp | 2 +- .../sve_hybrid_s8qa_dot_4x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_s8qa_mmla_4x4VL.hpp | 2 +- .../sve_hybrid_s8qa_mmla_4x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_s8qs_dot_6x4VL.hpp | 2 +- .../sve_hybrid_s8qs_dot_6x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_s8qs_mmla_6x4VL.hpp | 2 +- .../sve_hybrid_s8qs_mmla_6x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_s8s32_dot_6x4VL.hpp | 2 +- .../sve_hybrid_s8s32_dot_6x4VL/a64fx.cpp | 2 +- .../sve_hybrid_s8s32_dot_6x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_s8s32_mmla_6x4VL.hpp | 2 +- .../sve_hybrid_s8s32_mmla_6x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_u8qa_dot_4x4VL.hpp | 2 +- .../sve_hybrid_u8qa_dot_4x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_u8qa_mmla_4x4VL.hpp | 2 +- .../sve_hybrid_u8qa_mmla_4x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_u8u32_dot_6x4VL.hpp | 2 +- .../sve_hybrid_u8u32_dot_6x4VL/a64fx.cpp | 2 +- .../sve_hybrid_u8u32_dot_6x4VL/generic.cpp | 2 +- .../kernels/sve_hybrid_u8u32_mmla_6x4VL.hpp | 2 +- .../sve_hybrid_u8u32_mmla_6x4VL/generic.cpp | 2 +- .../sve_interleaved_bf16fp32_dot_8x3VL.hpp | 2 +- .../generic.cpp | 2 +- .../sve_interleaved_bf16fp32_mmla_8x3VL.hpp | 2 +- .../generic.cpp | 2 +- .../sve_interleaved_fp16_mla_8x3VL.hpp | 2 +- .../sve_interleaved_fp16_mla_8x3VL/a64fx.cpp | 2 +- .../generic.cpp | 2 +- .../sve_interleaved_fp32_mla_8x3VL.hpp | 2 +- .../sve_interleaved_fp32_mla_8x3VL/a64fx.cpp | 2 +- .../generic.cpp | 2 +- .../sve_interleaved_fp32_mmla_8x3VL.hpp | 2 +- .../sve_interleaved_s8s32_dot_8x3VL.hpp | 2 +- .../sve_interleaved_s8s32_dot_8x3VL/a64fx.cpp | 2 +- .../generic.cpp | 2 +- .../sve_interleaved_s8s32_mmla_8x3VL.hpp | 2 +- .../generic.cpp | 2 +- .../sve_interleaved_u8u32_dot_8x3VL.hpp | 2 +- .../sve_interleaved_u8u32_dot_8x3VL/a64fx.cpp | 2 +- .../generic.cpp | 2 +- .../sve_interleaved_u8u32_mmla_8x3VL.hpp | 2 +- .../generic.cpp | 2 +- .../arm_gemm/performance_parameters.hpp | 2 +- src/core/NEON/kernels/arm_gemm/quantized.cpp | 2 +- .../a32_transpose_interleave_8way_32bit.hpp | 2 +- .../a64_transpose_interleave_128.hpp | 2 +- .../a64_transpose_interleave_12_1x4.hpp | 2 +- .../a64_transpose_interleave_12_1x8.hpp | 2 +- .../a64_transpose_interleave_12_2x2.hpp | 2 +- .../a64_transpose_interleave_12_2x4.hpp | 2 +- ...4_transpose_interleave_12_2x4_fp32bf16.hpp | 2 +- .../a64_transpose_interleave_12_s8s16.hpp | 2 +- .../a64_transpose_interleave_12_u8u16.hpp | 2 +- .../a64_transpose_interleave_16.hpp | 12 +- .../a64_transpose_interleave_16_1x4.hpp | 2 +- .../a64_transpose_interleave_16_1x8.hpp | 2 +- .../a64_transpose_interleave_16_2x2.hpp | 2 +- .../a64_transpose_interleave_16_2x4.hpp | 2 +- ...4_transpose_interleave_16_2x4_fp32bf16.hpp | 2 +- .../a64_transpose_interleave_24.hpp | 2 +- ...4_transpose_interleave_24_2x4_fp32bf16.hpp | 2 +- .../a64_transpose_interleave_24_bf16fp32.hpp | 2 +- .../a64_transpose_interleave_24_fp16fp32.hpp | 2 +- .../a64_transpose_interleave_32_1x4.hpp | 2 +- .../a64_transpose_interleave_32_2x2.hpp | 2 +- .../a64_transpose_interleave_48.hpp | 2 +- .../a64_transpose_interleave_4_1x16.hpp | 2 +- .../a64_transpose_interleave_4_1x4.hpp | 2 +- .../a64_transpose_interleave_64.hpp | 2 +- .../a64_transpose_interleave_96.hpp | 2 +- .../sme_transpose_interleave_16VL.hpp | 2 +- .../sme_transpose_interleave_16VL_1x4.hpp | 2 +- ...transpose_interleave_16VL_2x2_fp32bf16.hpp | 2 +- .../sme_transpose_interleave_1VL.hpp | 2 +- .../sme_transpose_interleave_1VL_1x4.hpp | 2 +- .../sme_transpose_interleave_1VL_2x2.hpp | 2 +- ..._transpose_interleave_1VL_2x2_fp32bf16.hpp | 2 +- .../sme_transpose_interleave_2VL.hpp | 2 +- .../sme_transpose_interleave_2VL_1x4.hpp | 2 +- .../sme_transpose_interleave_2VL_2x2.hpp | 2 +- ..._transpose_interleave_2VL_2x2_fp32bf16.hpp | 2 +- .../sme_transpose_interleave_4VL.hpp | 2 +- .../sme_transpose_interleave_4VL_1x4.hpp | 2 +- .../sme_transpose_interleave_4VL_2x2.hpp | 2 +- ..._transpose_interleave_4VL_2x2_fp32bf16.hpp | 2 +- .../sme_transpose_interleave_8VL.hpp | 2 +- .../sme_transpose_interleave_8VL_1x4.hpp | 2 +- .../sme_transpose_interleave_8VL_2x2.hpp | 2 +- ...transpose_interleave_12VL_2x4_fp32bf16.hpp | 2 +- .../sve_transpose_interleave_1VL.hpp | 2 +- .../sve_transpose_interleave_1VL_1x4.hpp | 2 +- .../sve_transpose_interleave_3VL.hpp | 2 +- .../sve_transpose_interleave_3VL_1x4.hpp | 2 +- .../sve_transpose_interleave_3VL_2x2.hpp | 2 +- .../sve_transpose_interleave_4VL.hpp | 2 +- .../sve_transpose_interleave_4VL_1x4.hpp | 2 +- .../sve_transpose_interleave_4VL_2x2.hpp | 2 +- .../sve_transpose_interleave_6VL_1x8.hpp | 2 +- .../sve_transpose_interleave_6VL_2x4.hpp | 2 +- ..._transpose_interleave_6VL_2x4_fp32bf16.hpp | 2 +- .../sve_transpose_interleave_6VL_4x2.hpp | 2 +- .../sve_transpose_interleave_8VL.hpp | 2 +- .../sve_transpose_interleave_8VL_1x4.hpp | 2 +- .../sve_transpose_interleave_8VL_1x8.hpp | 2 +- .../sve_transpose_interleave_8VL_2x2.hpp | 2 +- .../sve_transpose_interleave_8VL_2x4.hpp | 2 +- ..._transpose_interleave_8VL_2x4_fp32bf16.hpp | 2 +- src/core/NEON/kernels/arm_gemm/utils.hpp | 2 +- src/core/NEON/kernels/assembly/depthwise.hpp | 7 +- .../NEON/kernels/assembly/pool_common.hpp | 7 +- src/core/NEON/kernels/assembly/pooling.hpp | 7 +- .../NEON/kernels/convolution/common/shims.hpp | 944 +++++++++++++++++- src/cpu/kernels/CpuDirectConv3dKernel.cpp | 20 +- .../CpuGemmLowpMatrixMultiplyKernel.cpp | 2 +- src/cpu/kernels/CpuPermuteKernel.cpp | 65 +- .../assembly/CpuGemmAssemblyWrapperKernel.h | 8 +- .../assembly/convolution_parameters.hpp | 8 +- .../list.h => generic/neon/float_impl.h} | 17 +- src/cpu/kernels/conv3d/generic/neon/fp16.cpp | 49 + src/cpu/kernels/conv3d/generic/neon/fp32.cpp | 46 + .../kernels/conv3d/generic/neon/qasymm8.cpp | 46 + .../conv3d/generic/neon/qasymm8_signed.cpp | 46 + .../neon/quantized_impl.h} | 12 +- src/cpu/kernels/conv3d/list.h | 47 + .../elementwise_binary/generic/sve/impl.cpp | 8 +- .../gemm_matrix_mul/generic/neon/impl.cpp | 572 +++++++---- .../meanstddevnorm/generic/neon/fp16.cpp | 13 +- src/cpu/operators/CpuConv2d.h | 8 +- src/cpu/operators/CpuGemm.cpp | 4 +- src/cpu/operators/CpuGemmConv2d.cpp | 10 +- src/cpu/operators/CpuGemmConv2d.h | 2 +- src/cpu/operators/CpuPermute.cpp | 76 +- src/cpu/operators/CpuWinogradConv2d.cpp | 4 +- .../internal/CpuGemmAssemblyDispatch.cpp | 4 +- src/gpu/cl/kernels/ClCropKernel.cpp | 3 +- src/runtime/CL/functions/CLLSTMLayer.cpp | 51 +- src/runtime/NEON/functions/NELSTMLayer.cpp | 50 +- .../experimental/operators/CpuActivation.cpp | 65 ++ .../experimental/operators/CpuGemm.cpp | 96 ++ .../experimental/operators/CpuGemmConv2d.cpp | 110 ++ .../operators/CpuGemmDirectConv2d.cpp | 84 ++ .../experimental/operators/CpuTranspose.cpp | 65 ++ .../operators/CpuWinogradConv2d.cpp | 90 ++ tests/BUILD.bazel | 3 +- tests/SConscript | 30 +- tests/datasets/ShapeDatasets.h | 22 +- tests/framework/Macros.h | 22 +- tests/framework/SConscript | 22 +- tests/validation/CL/CropResize.cpp | 4 +- tests/validation/CL/LSTMLayer.cpp | 207 ++-- .../CL/MeanStdDevNormalizationLayer.cpp | 8 +- tests/validation/CMakeLists.txt | 10 +- tests/validation/CPP/Permute.cpp | 20 +- tests/validation/NEON/ActivationLayer.cpp | 40 +- tests/validation/NEON/AddMulAdd.cpp | 33 +- tests/validation/NEON/ArgMinMax.cpp | 31 +- tests/validation/NEON/ArithmeticAddition.cpp | 18 +- .../validation/NEON/ArithmeticSubtraction.cpp | 18 +- .../validation/NEON/BatchConcatenateLayer.cpp | 30 +- .../NEON/BatchNormalizationLayer.cpp | 34 +- .../validation/NEON/BoundingBoxTransform.cpp | 22 +- tests/validation/NEON/Cast.cpp | 32 +- tests/validation/NEON/ChannelShuffle.cpp | 30 +- tests/validation/NEON/Comparisons.cpp | 30 +- .../NEON/ConvertFullyConnectedWeights.cpp | 30 +- tests/validation/NEON/Convolution3D.cpp | 22 +- tests/validation/NEON/ConvolutionLayer.cpp | 110 +- tests/validation/NEON/CropResize.cpp | 18 +- tests/validation/NEON/DeconvolutionLayer.cpp | 70 +- .../validation/NEON/DepthConcatenateLayer.cpp | 30 +- tests/validation/NEON/DepthConvertLayer.cpp | 206 +++- .../NEON/DepthwiseConvolutionLayer.cpp | 128 ++- tests/validation/NEON/DequantizationLayer.cpp | 36 +- .../NEON/DilatedConvolutionLayer.cpp | 36 +- .../NEON/DirectConvolutionLayer.cpp | 26 +- .../NEON/ElementwiseAbsoluteValue.cpp | 34 +- tests/validation/NEON/ElementwiseDivision.cpp | 20 +- tests/validation/NEON/ElementwiseExpLayer.cpp | 34 +- tests/validation/NEON/ElementwiseLog.cpp | 34 +- tests/validation/NEON/ElementwiseMax.cpp | 22 +- tests/validation/NEON/ElementwiseMin.cpp | 22 +- tests/validation/NEON/ElementwiseNegation.cpp | 34 +- tests/validation/NEON/ElementwisePower.cpp | 22 +- tests/validation/NEON/ElementwiseRound.cpp | 30 +- .../validation/NEON/ElementwiseRsqrtLayer.cpp | 34 +- tests/validation/NEON/ElementwiseSin.cpp | 34 +- .../validation/NEON/ElementwiseSquareDiff.cpp | 26 +- tests/validation/NEON/Flatten.cpp | 30 +- tests/validation/NEON/Floor.cpp | 30 +- tests/validation/NEON/FullyConnectedLayer.cpp | 48 +- .../NEON/FuseBatchNormalization.cpp | 70 +- tests/validation/NEON/GEMM.cpp | 49 +- .../NEON/GenerateProposalsLayer.cpp | 18 +- tests/validation/NEON/Im2Col.cpp | 30 +- .../NEON/InstanceNormalizationLayer.cpp | 22 +- tests/validation/NEON/L2NormalizeLayer.cpp | 34 +- tests/validation/NEON/LSTMLayer.cpp | 227 +++-- tests/validation/NEON/LSTMLayerQuantized.cpp | 156 ++- tests/validation/NEON/LogSoftmaxLayer.cpp | 46 +- tests/validation/NEON/MatMul.cpp | 40 +- tests/validation/NEON/MaxUnpoolingLayer.cpp | 18 +- .../NEON/MeanStdDevNormalizationLayer.cpp | 40 +- tests/validation/NEON/NormalizationLayer.cpp | 22 +- tests/validation/NEON/PReluLayer.cpp | 34 +- tests/validation/NEON/PadLayer.cpp | 30 +- tests/validation/NEON/Permute.cpp | 19 +- .../NEON/PixelWiseMultiplication.cpp | 18 +- tests/validation/NEON/Pooling3dLayer.cpp | 70 +- tests/validation/NEON/PoolingLayer.cpp | 58 +- tests/validation/NEON/QuantizationLayer.cpp | 64 +- tests/validation/NEON/RNNLayer.cpp | 20 +- tests/validation/NEON/ROIAlignLayer.cpp | 22 +- tests/validation/NEON/Range.cpp | 18 +- tests/validation/NEON/ReduceMean.cpp | 34 +- tests/validation/NEON/ReductionOperation.cpp | 34 +- tests/validation/NEON/Reverse.cpp | 30 +- tests/validation/NEON/Scale.cpp | 104 +- tests/validation/NEON/Select.cpp | 28 +- tests/validation/NEON/Slice.cpp | 28 +- tests/validation/NEON/SoftmaxLayer.cpp | 55 +- tests/validation/NEON/Split.cpp | 34 +- tests/validation/NEON/StridedSlice.cpp | 28 +- tests/validation/NEON/Unstack.cpp | 21 +- .../fixtures/ActivationLayerFixture.h | 6 + tests/validation/fixtures/AddMulAddFixture.h | 19 +- tests/validation/fixtures/ArgMinMaxFixture.h | 14 +- .../fixtures/ArithmeticOperationsFixture.h | 14 +- .../fixtures/BatchNormalizationLayerFixture.h | 15 +- .../fixtures/BoundingBoxTransformFixture.h | 14 +- tests/validation/fixtures/CastFixture.h | 14 +- .../fixtures/ChannelShuffleLayerFixture.h | 14 +- tests/validation/fixtures/ComparisonFixture.h | 14 +- .../fixtures/ComputeAllAnchorsFixture.h | 14 +- .../fixtures/ConcatenateLayerFixture.h | 14 +- .../ConvertFullyConnectedWeightsFixture.h | 14 +- .../fixtures/ConvolutionLayerFixture.h | 25 +- .../fixtures/CpuActivationFixture.h | 200 ++++ .../fixtures/CpuGemmConv2dFixture.h | 165 +++ .../fixtures/CpuGemmDirectConv2dFixture.h | 166 +++ .../validation/fixtures/CpuTransposeFixture.h | 110 ++ .../fixtures/CpuWinogradConv2dFixture.h | 211 ++++ tests/validation/fixtures/CropResizeFixture.h | 14 +- .../fixtures/DeconvolutionLayerFixture.h | 38 +- .../fixtures/DepthConvertLayerFixture.h | 14 +- .../DepthwiseConvolutionLayerFixture.h | 10 + .../fixtures/DequantizationLayerFixture.h | 14 +- .../fixtures/DirectConvolution3DFixture.h | 11 +- .../fixtures/DirectConvolutionLayerFixture.h | 14 +- .../fixtures/ElementwiseOperationsFixture.h | 9 +- .../fixtures/ElementwiseUnaryFixture.h | 14 +- .../validation/fixtures/FlattenLayerFixture.h | 14 +- tests/validation/fixtures/FloorFixture.h | 14 +- .../fixtures/FullyConnectedLayerFixture.h | 12 + .../fixtures/FuseBatchNormalizationFixture.h | 14 +- tests/validation/fixtures/GEMMFixture.h | 6 + tests/validation/fixtures/Im2ColFixture.h | 14 +- .../InstanceNormalizationLayerFixture.h | 14 +- .../fixtures/L2NormalizeLayerFixture.h | 14 +- tests/validation/fixtures/LSTMLayerFixture.h | 57 +- tests/validation/fixtures/MatMulFixture.h | 6 + .../fixtures/MaxUnpoolingLayerFixture.h | 14 +- .../MeanStdDevNormalizationLayerFixture.h | 14 +- .../fixtures/NormalizationLayerFixture.h | 14 +- tests/validation/fixtures/PadLayerFixture.h | 14 +- .../fixtures/PixelWiseMultiplicationFixture.h | 15 +- .../fixtures/Pooling3dLayerFixture.h | 14 +- .../validation/fixtures/PoolingLayerFixture.h | 14 +- .../fixtures/QuantizationLayerFixture.h | 14 +- tests/validation/fixtures/RNNLayerFixture.h | 14 +- .../fixtures/ROIAlignLayerFixture.h | 14 +- tests/validation/fixtures/RangeFixture.h | 14 +- tests/validation/fixtures/ReduceMeanFixture.h | 14 +- .../fixtures/ReductionOperationFixture.h | 14 +- tests/validation/fixtures/ReverseFixture.h | 8 +- tests/validation/fixtures/ScaleFixture.h | 8 +- tests/validation/fixtures/SelectFixture.h | 14 +- .../fixtures/SliceOperationsFixtures.h | 20 +- .../validation/fixtures/SoftmaxLayerFixture.h | 14 +- tests/validation/fixtures/SplitFixture.h | 20 +- tests/validation/fixtures/UnstackFixture.h | 14 +- .../WinogradConvolutionLayerFixture.h | 24 + .../gpu/cl/MatMulKernelFixture.h | 1 - tests/validation/reference/Conv3D.cpp | 24 +- tests/validation/reference/Conv3D.h | 10 +- .../MeanStdDevNormalizationLayer.cpp | 46 +- tests/validation/reference/Permute.cpp | 3 + .../experimental/operators/CpuActivation.cpp | 124 +++ .../experimental/operators/CpuGemm.cpp | 143 +++ .../experimental/operators/CpuGemmConv2d.cpp | 136 +++ .../operators/CpuGemmDirectConv2d.cpp | 145 +++ .../experimental/operators/CpuTranspose.cpp | 66 ++ .../operators/CpuWinogradConv2d.cpp | 141 +++ 728 files changed, 8443 insertions(+), 2102 deletions(-) create mode 100644 arm_compute/runtime/experimental/operators/CpuActivation.h create mode 100644 arm_compute/runtime/experimental/operators/CpuGemm.h create mode 100644 arm_compute/runtime/experimental/operators/CpuGemmConv2d.h create mode 100644 arm_compute/runtime/experimental/operators/CpuGemmDirectConv2d.h create mode 100644 arm_compute/runtime/experimental/operators/CpuTranspose.h create mode 100644 arm_compute/runtime/experimental/operators/CpuWinogradConv2d.h rename src/cpu/kernels/conv3d/{neon/list.h => generic/neon/float_impl.h} (96%) create mode 100644 src/cpu/kernels/conv3d/generic/neon/fp16.cpp create mode 100644 src/cpu/kernels/conv3d/generic/neon/fp32.cpp create mode 100644 src/cpu/kernels/conv3d/generic/neon/qasymm8.cpp create mode 100644 src/cpu/kernels/conv3d/generic/neon/qasymm8_signed.cpp rename src/cpu/kernels/conv3d/{neon/quantized.h => generic/neon/quantized_impl.h} (98%) create mode 100644 src/cpu/kernels/conv3d/list.h create mode 100644 src/runtime/experimental/operators/CpuActivation.cpp create mode 100644 src/runtime/experimental/operators/CpuGemm.cpp create mode 100644 src/runtime/experimental/operators/CpuGemmConv2d.cpp create mode 100644 src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp create mode 100644 src/runtime/experimental/operators/CpuTranspose.cpp create mode 100644 src/runtime/experimental/operators/CpuWinogradConv2d.cpp create mode 100644 tests/validation/fixtures/CpuActivationFixture.h create mode 100644 tests/validation/fixtures/CpuGemmConv2dFixture.h create mode 100644 tests/validation/fixtures/CpuGemmDirectConv2dFixture.h create mode 100644 tests/validation/fixtures/CpuTransposeFixture.h create mode 100644 tests/validation/fixtures/CpuWinogradConv2dFixture.h create mode 100644 tests/validation/runtime/experimental/operators/CpuActivation.cpp create mode 100644 tests/validation/runtime/experimental/operators/CpuGemm.cpp create mode 100644 tests/validation/runtime/experimental/operators/CpuGemmConv2d.cpp create mode 100644 tests/validation/runtime/experimental/operators/CpuGemmDirectConv2d.cpp create mode 100644 tests/validation/runtime/experimental/operators/CpuTranspose.cpp create mode 100644 tests/validation/runtime/experimental/operators/CpuWinogradConv2d.cpp diff --git a/Android.bp b/Android.bp index 1f1e591bd1..ea536a5ab2 100644 --- a/Android.bp +++ b/Android.bp @@ -479,6 +479,10 @@ cc_library_static { "src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp", "src/cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp", "src/cpu/kernels/cast/generic/neon/fp16.cpp", + "src/cpu/kernels/conv3d/generic/neon/fp16.cpp", + "src/cpu/kernels/conv3d/generic/neon/fp32.cpp", + "src/cpu/kernels/conv3d/generic/neon/qasymm8.cpp", + "src/cpu/kernels/conv3d/generic/neon/qasymm8_signed.cpp", "src/cpu/kernels/crop/generic/neon/fp16.cpp", "src/cpu/kernels/crop/generic/neon/fp32.cpp", "src/cpu/kernels/crop/generic/neon/integer.cpp", @@ -1010,6 +1014,12 @@ cc_library_static { "src/runtime/Tensor.cpp", "src/runtime/TensorAllocator.cpp", "src/runtime/Utils.cpp", + "src/runtime/experimental/operators/CpuActivation.cpp", + "src/runtime/experimental/operators/CpuGemm.cpp", + "src/runtime/experimental/operators/CpuGemmConv2d.cpp", + "src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp", + "src/runtime/experimental/operators/CpuTranspose.cpp", + "src/runtime/experimental/operators/CpuWinogradConv2d.cpp", "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp", "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp", "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp", diff --git a/CMakeLists.txt b/CMakeLists.txt index f291534201..2fc74ee118 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute) project( ArmCompute - VERSION 38.0.0 + VERSION 39.0.0 DESCRIPTION "The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures" LANGUAGES C CXX ASM) diff --git a/README.md b/README.md index 02dd05edac..f2f2c636a0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ - > **⚠ Deprecation Notice** > 24.01 announcement: NCHW data format specific optimizations will gradually be removed from the code base in > future releases. The implication of this is that the user is expected to translate NCHW models into NHWC in @@ -9,7 +8,7 @@

-# Compute Library ![](https://img.shields.io/badge/latest_release-24.06-green) +# Compute Library ![](https://img.shields.io/badge/latest_release-24.07-green) The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.
@@ -37,7 +36,7 @@ Key Features:
## Documentation -[![Documentation](https://img.shields.io/badge/documentation-24.06-green)](https://arm-software.github.io/ComputeLibrary/latest) +[![Documentation](https://img.shields.io/badge/documentation-24.07-green)](https://arm-software.github.io/ComputeLibrary/latest) > Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc. @@ -50,24 +49,22 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C | Platform | Operating System | Release archive (Download) | | -------------- | ---------------- | -------------------------- | -| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon.tar.gz) | -| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) | -| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) | -| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) | +| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-armv7a-cpu-bin.tar.gz) | +| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-aarch64-cpu-bin.tar.gz) | +| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-aarch64-cpu-gpu-bin.tar.gz) | +| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-aarch64-cpu-gpu-bin.tar.gz) |
| Architecture | Operating System | Release archive (Download) | | ------------ | ---------------- | -------------------------- | -| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon-cl.tar.gz) | -| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-neon-cl.tar.gz) | -| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) | -| arm64-v8.2-a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-neon-cl.tar.gz) | -| arm64-v8.2-a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) | +| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-armv7a-cpu-gpu-bin.tar.gz) | +| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-android-aarch64-cpu-gpu-bin.tar.gz) | +| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.07/arm_compute-v24.07-linux-aarch64-cpu-gpu-bin.tar.gz) |
-Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.06-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.06) +Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.07-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.07) Pre-build binaries are generated with the following security / good coding practices related flags: > -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong diff --git a/SConscript b/SConscript index 325506ed40..76ed5c2d6b 100644 --- a/SConscript +++ b/SConscript @@ -31,9 +31,10 @@ import zlib import json import codecs import platform +import SCons -VERSION = "v24.06" -LIBRARY_VERSION_MAJOR = 38 +VERSION = "v24.07" +LIBRARY_VERSION_MAJOR = 39 LIBRARY_VERSION_MINOR = 0 LIBRARY_VERSION_PATCH = 0 SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH) @@ -151,6 +152,33 @@ def get_ckw_obj_list(): def build_library(name, build_env, sources, static=False, libs=[]): cloned_build_env = build_env.Clone() + + #The following set up only works for posix system, RANLIBCOM isn't available on win32 HOST_OS + if cloned_build_env['HOST_OS'] == 'posix': + #Set up to use temp file for long command when building and linking libraries + cloned_build_env['TEMPFILE'] = SCons.Platform.TempFileMunge + + #To use temp file for any command, the following pattern should be used: + # env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}" + #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147 + #The commands' string are taken from https://github.com/SCons/scons + #The commands' explanations are taken from Scons userguide + + #The command line used to compile C++ source file to an object file + cloned_build_env['CXXCOM'] = "${TEMPFILE('"+ cloned_build_env['CXXCOM'] + "')}" + #The command line used to compile C++ source file to a shared-library object file + cloned_build_env['SHCXXCOM'] = "${TEMPFILE('"+ cloned_build_env['SHCXXCOM'] + "')}" + #The command line used to generate a static library from object files + cloned_build_env['ARCOM'] = "${TEMPFILE('"+ cloned_build_env['ARCOM'] + "')}" + #The command line used to link object files into an executable + cloned_build_env['LINKCOM'] = "${TEMPFILE('"+ cloned_build_env['LINKCOM'] + "')}" + #The command line used to link programs using shared libraries + cloned_build_env['SHLINKCOM'] = "${TEMPFILE('"+ cloned_build_env['SHLINKCOM'] + "')}" + #The command line used to index a static library archive + cloned_build_env['RANLIBCOM'] = "${TEMPFILE('"+ cloned_build_env['RANLIBCOM'] + "')}" + #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files + cloned_build_env['TEMPFILEDIR'] = cloned_build_env['build_dir'] + if env['os'] == 'android' and static == False: cloned_build_env["LINKFLAGS"].remove('-pie') cloned_build_env["LINKFLAGS"].remove('-static-libstdc++') diff --git a/SConstruct b/SConstruct index bad85e503d..941f173d3d 100644 --- a/SConstruct +++ b/SConstruct @@ -66,7 +66,7 @@ def update_data_type_layout_flags(env, data_types, data_layouts): if any(i in data_types for i in ['all', 'fp16']): env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS', '-DARM_COMPUTE_ENABLE_FP16']) else: - if not 'v8a' in env['arch'] and not 'v7a' in env['arch'] and not 'armv8r64' in env['arch']: + if not 'v8a' in env['arch'] and not 'v7a' in env['arch'] and not 'armv8r64' in env['arch'] and not 'x86' in env['arch']: if any(i in data_types for i in ['all', 'fp16']): env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS','-DARM_COMPUTE_ENABLE_FP16']) diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index fe494991af..0d98cbafc1 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CLLSTMLAYER_H -#define ARM_COMPUTE_CLLSTMLAYER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLLSTMLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLLSTMLAYER_H #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" @@ -322,7 +322,6 @@ class CLLSTMLayer : public IFunction CLTensor _forget_gate_out3; CLTensor _forget_gate_out4; CLTensor _forget_gate_out5; - CLTensor _forget_gate_out6; CLTensor _cell_state_out1; CLTensor _cell_state_out2; CLTensor _cell_state_out3; @@ -353,4 +352,4 @@ class CLLSTMLayer : public IFunction const ICLTensor *_recurrent_to_cell_weights{nullptr}; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_CLLSTMLAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLLSTMLAYER_H diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index 2d07980ade..83261ce714 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index 629c5d10a0..8416111881 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NELSTMLAYER_H -#define ARM_COMPUTE_NELSTMLAYER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NELSTMLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NELSTMLAYER_H #include "arm_compute/core/Types.h" #include "arm_compute/runtime/common/LSTMParams.h" @@ -245,7 +245,6 @@ class NELSTMLayer : public IFunction Tensor _forget_gate_out3; Tensor _forget_gate_out4; Tensor _forget_gate_out5; - Tensor _forget_gate_out6; Tensor _cell_state_out1; Tensor _cell_state_out2; Tensor _cell_state_out3; @@ -275,4 +274,4 @@ class NELSTMLayer : public IFunction bool _is_layer_norm_lstm; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NELSTMLAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NELSTMLAYER_H diff --git a/arm_compute/runtime/experimental/operators/CpuActivation.h b/arm_compute/runtime/experimental/operators/CpuActivation.h new file mode 100644 index 0000000000..274823acaf --- /dev/null +++ b/arm_compute/runtime/experimental/operators/CpuActivation.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUACTIVATION_H +#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUACTIVATION_H + +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorInfo.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace op +{ +/** Wrapper class for CpuActivation. For information on the functions, + * see "src/cpu/operators/CpuActivation.h" +*/ +class CpuActivation +{ +public: + /** Constructor **/ + CpuActivation(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuActivation(const CpuActivation &) = delete; + /** Default move constructor */ + CpuActivation(CpuActivation &&) = default; + /** Default destructor */ + ~CpuActivation(); + + /** Configure operator for a given list of arguments + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] dst Destination tensor info. Data type supported: same as @p src + * @param[in] act_info Activation layer parameters. + */ + void configure(const ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &act_info); + + /** Static function to check if given info will lead to a valid configuration + * + * Similar to @ref CpuActivation::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info); + + void run(ITensorPack &tensors); + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace op +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUACTIVATION_H diff --git a/arm_compute/runtime/experimental/operators/CpuGemm.h b/arm_compute/runtime/experimental/operators/CpuGemm.h new file mode 100644 index 0000000000..e397cbf006 --- /dev/null +++ b/arm_compute/runtime/experimental/operators/CpuGemm.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H +#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H + +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/function_info/GEMMInfo.h" +#include "arm_compute/runtime/IOperator.h" + +/* + * A shallow wrapper for arm_compute::cpu::CpuGemm. + * Any new features should be added to arm_compute::cpu::CpuGemm and + * arm_compute::experimental::ops::CpuGemm should remain a shallow wrapper. +*/ + +namespace arm_compute +{ +namespace experimental +{ +namespace ops +{ +/** Wrapper class for CpuGemm. For information on the operators, + * see "src/cpu/operators/CpuGemm.h" +*/ +class CpuGemm : IOperator +{ +public: + /** Constructor **/ + CpuGemm(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuGemm(const CpuGemm &) = delete; + /** Default move constructor */ + CpuGemm(CpuGemm &&) = default; + /** Default destructor */ + ~CpuGemm(); + + /** Configure operator for a given list of arguments + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |a |b |c |d | + * |:------------|:-----------|:---------|:--------------| + * |F32 |F32 |F32 |F32 | + * |F16 |F16 |F16 |F16 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |FP32 | + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function. + * + * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around + * + * @param[in] a First input tensor info (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32 + * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a + * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a + * @param[out] d Output tensor info. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + * @param[in, out] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and + * if the reshape of matrix B should happen only for the first run + */ + void configure(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); + + /** Static function to check if given info will lead to a valid configuration of @ref CpuGemm. + * + * Similar to @ref CpuGemm::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); + + /** Indicates whether or not there is an optimal assembly implementation that can be used to process the given parameters. + * + * This method has the same use of @ref + * NEGEMMConvolutionLayer::has_opt_impl, with the only caveat that + * the value of arm_compute::WeightFormat need to be passed via the + * parameter gemm_info. + */ + static Status has_opt_impl(arm_compute::WeightFormat &weight_format, + const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + const GEMMInfo &gemm_info = GEMMInfo()); + + void run(ITensorPack &tensors); + void prepare(ITensorPack &constants); + experimental::MemoryRequirements workspace() const; + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace ops +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H diff --git a/arm_compute/runtime/experimental/operators/CpuGemmConv2d.h b/arm_compute/runtime/experimental/operators/CpuGemmConv2d.h new file mode 100644 index 0000000000..89749e3f25 --- /dev/null +++ b/arm_compute/runtime/experimental/operators/CpuGemmConv2d.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2021-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMMCONV2D_H +#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMMCONV2D_H + +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IOperator.h" + +#include + +namespace arm_compute +{ +namespace experimental +{ +namespace op +{ +/* + * A shallow wrapper for arm_compute::cpu::CpuGemmConv2d. + * Any new features should be added to arm_compute::cpu::CpuGemmConv2d and + * arm_compute::experimental::op::CpuGemmConv2d should remain a shallow wrapper. +*/ +class CpuGemmConv2d : IOperator +{ +public: + /** Constructor */ + CpuGemmConv2d(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuGemmConv2d(const CpuGemmConv2d &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + CpuGemmConv2d(CpuGemmConv2d &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuGemmConv2d &operator=(const CpuGemmConv2d &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + CpuGemmConv2d &operator=(CpuGemmConv2d &&) = delete; + /** Destructor */ + ~CpuGemmConv2d(); + /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QASYMM8_SIGNED |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * + * @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with CpuWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + */ + void configure(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + ITensorInfo *dst, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to CpuGemmConvolution::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); + + /** Indicates whether or not there is an optimal assembly implementation that can be used to process the given parameters. + * + * The parameter list is the same as @ref NEGEMMConvolutionLayer::has_opt_impl + * + * @return a status. + */ + static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, + const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const bool enable_fast_math = false); + + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &tensors) override; + experimental::MemoryRequirements workspace() const override; + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace op +} // namespace experimental +} // namespace arm_compute +#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMMCONV2D_H diff --git a/arm_compute/runtime/experimental/operators/CpuGemmDirectConv2d.h b/arm_compute/runtime/experimental/operators/CpuGemmDirectConv2d.h new file mode 100644 index 0000000000..b7f1548245 --- /dev/null +++ b/arm_compute/runtime/experimental/operators/CpuGemmDirectConv2d.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMMDIRECTCONV2D_H +#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMMDIRECTCONV2D_H + +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/FunctionDescriptors.h" +#include "arm_compute/runtime/IOperator.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace op +{ +/* + * A shallow wrapper for arm_compute::cpu::CpuGemmDirectConv2d. + * Any new features should be added to arm_compute::cpu::CpuGemmDirectConv2d and + * arm_compute::experimental::op::CpuGemmDirectConv2d should remain a shallow wrapper. +*/ +class CpuGemmDirectConv2d : IOperator +{ +public: + /** Constructor **/ + CpuGemmDirectConv2d(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuGemmDirectConv2d(const CpuGemmDirectConv2d &) = delete; + /** Default move constructor */ + CpuGemmDirectConv2d(CpuGemmDirectConv2d &&) = default; + /** Default destructor */ + ~CpuGemmDirectConv2d(); + + /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * + * @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[in] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + ITensorInfo *dst, + const Conv2dInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref CpuGemmDirectConv2d + * + * Similar to CpuGemmDirectConv2d::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *dst, + const Conv2dInfo &info); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &constants) override; + experimental::MemoryRequirements workspace() const override; + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace op +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMMDIRECTCONV2D_H diff --git a/arm_compute/runtime/experimental/operators/CpuTranspose.h b/arm_compute/runtime/experimental/operators/CpuTranspose.h new file mode 100644 index 0000000000..be8ba0085f --- /dev/null +++ b/arm_compute/runtime/experimental/operators/CpuTranspose.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUTRANSPOSE_H +#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUTRANSPOSE_H + +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorInfo.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace op +{ +/** Wrapper class for CpuTranspose. For information on the functions, + * see "src/cpu/operators/CpuTranspose.h" +*/ +class CpuTranspose +{ +public: + /** Constructor **/ + CpuTranspose(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuTranspose(const CpuTranspose &) = delete; + /** Default move constructor */ + CpuTranspose(CpuTranspose &&) = default; + /** Default destructor */ + ~CpuTranspose(); + + /** Configure kernel for a given list of arguments + * + * @param[in] src Source tensor to permute. Data types supported: All + * @param[out] dst Destination tensor. Data types supported: Same as @p src + */ + void configure(const ITensorInfo *src, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to CpuTransposeKernel::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *dst); + + void run(ITensorPack &tensors); + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace op +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUTRANSPOSE_H diff --git a/arm_compute/runtime/experimental/operators/CpuWinogradConv2d.h b/arm_compute/runtime/experimental/operators/CpuWinogradConv2d.h new file mode 100644 index 0000000000..eb0e1d4f8c --- /dev/null +++ b/arm_compute/runtime/experimental/operators/CpuWinogradConv2d.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2021-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUWINOGRADCONV2D_H +#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUWINOGRADCONV2D_H + +#include "arm_compute/runtime/IOperator.h" + +#include "src/cpu/operators/CpuWinogradConv2d.h" + +#include + +namespace arm_compute +{ +namespace experimental +{ +namespace op +{ +/* + * A shallow wrapper for arm_compute::cpu::CpuWinogradConv2d. + * Any new features should be added to arm_compute::cpu::CpuWinogradConv2d and + * arm_compute::experimental::op::CpuWinogradConv2d should remain a shallow wrapper. +*/ +class CpuWinogradConv2d : IOperator +{ +public: + /** Constructors */ + CpuWinogradConv2d(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuWinogradConv2d(const CpuWinogradConv2d &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuWinogradConv2d &operator=(const CpuWinogradConv2d &) = delete; + /** Default move constructor */ + CpuWinogradConv2d(CpuWinogradConv2d &&) = default; + /** Default move assignment */ + CpuWinogradConv2d &operator=(CpuWinogradConv2d &&) = default; + + /** Destructor */ + ~CpuWinogradConv2d(); + + /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * + * @param[in] src Source tensor Info. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F16/F32. + * @param[in] weights Weights tensor Info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. + * For supported kernel sizes, see @ref arm_compute::NEWinogradConvolutionLayer + * @param[in] biases Biases tensor Info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] dst Destination tensor Info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + */ + void configure(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + ITensorInfo *dst, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); + /** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2d + * + * Similar to CpuWinogradConv2d::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *dst, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); + + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &constants) override; + experimental::MemoryRequirements workspace() const override; + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace op +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUWINOGRADCONV2D_H diff --git a/docs/Doxyfile b/docs/Doxyfile index 219cbd6d48..816f029648 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -38,20 +38,20 @@ PROJECT_NAME = "Compute Library" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 24.06 +PROJECT_NUMBER = 24.07 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. -PROJECT_BRIEF = +PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. -PROJECT_LOGO = +PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is @@ -171,7 +171,7 @@ FULL_PATH_NAMES = YES # specify the list of include paths that are normally passed to the compiler # using the -I flag. -STRIP_FROM_INC_PATH = +STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't @@ -238,13 +238,13 @@ TAB_SIZE = 4 # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. -ALIASES = +ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. -TCL_SUBST = +TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For @@ -632,7 +632,7 @@ GENERATE_DEPRECATEDLIST= YES # sections, marked by \if ... \endif and \cond # ... \endcond blocks. -ENABLED_SECTIONS = +ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the @@ -674,7 +674,7 @@ SHOW_NAMESPACES = YES # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. -FILE_VERSION_FILTER = +FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated @@ -697,7 +697,7 @@ LAYOUT_FILE = ./docs/DoxygenLayout.xml # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. -CITE_BIB_FILES = +CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages @@ -756,7 +756,7 @@ WARN_FORMAT = "$file:$line:[DOXY_WARN] $text" # messages should be written. If left blank the output is written to standard # error (stderr). -WARN_LOGFILE = +WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files @@ -894,7 +894,7 @@ EXCLUDE_SYMLINKS = NO # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* -EXCLUDE_PATTERNS = +EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the @@ -905,7 +905,7 @@ EXCLUDE_PATTERNS = # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = +EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include @@ -952,7 +952,7 @@ IMAGE_PATH = ./docs/ # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. -INPUT_FILTER = +INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the @@ -961,7 +961,7 @@ INPUT_FILTER = # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. -FILTER_PATTERNS = +FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for @@ -976,14 +976,14 @@ FILTER_SOURCE_FILES = NO # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. -FILTER_SOURCE_PATTERNS = +FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. -USE_MDFILE_AS_MAINPAGE = +USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing @@ -1114,7 +1114,7 @@ COLS_IN_ALPHA_INDEX = 5 # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. -IGNORE_PREFIX = +IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output @@ -1168,7 +1168,7 @@ HTML_HEADER = ./docs/header.html # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_FOOTER = +HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of @@ -1180,7 +1180,7 @@ HTML_FOOTER = # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_STYLESHEET = +HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets @@ -1203,7 +1203,7 @@ HTML_EXTRA_STYLESHEET = ./docs/stylesheet.css # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_FILES = +HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to @@ -1331,7 +1331,7 @@ GENERATE_HTMLHELP = NO # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. -CHM_FILE = +CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, @@ -1339,7 +1339,7 @@ CHM_FILE = # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. -HHC_LOCATION = +HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). @@ -1352,7 +1352,7 @@ GENERATE_CHI = NO # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. -CHM_INDEX_ENCODING = +CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it @@ -1383,7 +1383,7 @@ GENERATE_QHP = NO # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. -QCH_FILE = +QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace @@ -1408,7 +1408,7 @@ QHP_VIRTUAL_FOLDER = doc # filters). # This tag requires that the tag GENERATE_QHP is set to YES. -QHP_CUST_FILTER_NAME = +QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom @@ -1416,21 +1416,21 @@ QHP_CUST_FILTER_NAME = # filters). # This tag requires that the tag GENERATE_QHP is set to YES. -QHP_CUST_FILTER_ATTRS = +QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. -QHP_SECT_FILTER_ATTRS = +QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. -QHG_LOCATION = +QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To @@ -1563,7 +1563,7 @@ MATHJAX_RELPATH = https://cdn.mathjax.org/mathjax/latest # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. -MATHJAX_EXTENSIONS = +MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site @@ -1571,7 +1571,7 @@ MATHJAX_EXTENSIONS = # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. -MATHJAX_CODEFILE = +MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and @@ -1631,7 +1631,7 @@ EXTERNAL_SEARCH = NO # Searching" for details. # This tag requires that the tag SEARCHENGINE is set to YES. -SEARCHENGINE_URL = +SEARCHENGINE_URL = # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed # search data is written to a file for indexing by an external tool. With the @@ -1647,7 +1647,7 @@ SEARCHDATA_FILE = searchdata.xml # projects and redirect the results back to the right project. # This tag requires that the tag SEARCHENGINE is set to YES. -EXTERNAL_SEARCH_ID = +EXTERNAL_SEARCH_ID = # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are @@ -1657,7 +1657,7 @@ EXTERNAL_SEARCH_ID = # EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ... # This tag requires that the tag SEARCHENGINE is set to YES. -EXTRA_SEARCH_MAPPINGS = +EXTRA_SEARCH_MAPPINGS = #--------------------------------------------------------------------------- # Configuration options related to the LaTeX output @@ -1718,7 +1718,7 @@ PAPER_TYPE = a4 # If left blank no extra packages will be included. # This tag requires that the tag GENERATE_LATEX is set to YES. -EXTRA_PACKAGES = +EXTRA_PACKAGES = # The LATEX_HEADER tag can be used to specify a personal LaTeX header for the # generated LaTeX document. The header should contain everything until the first @@ -1734,7 +1734,7 @@ EXTRA_PACKAGES = # to HTML_HEADER. # This tag requires that the tag GENERATE_LATEX is set to YES. -LATEX_HEADER = +LATEX_HEADER = # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the # generated LaTeX document. The footer should contain everything after the last @@ -1745,7 +1745,7 @@ LATEX_HEADER = # Note: Only use a user-defined footer if you know what you are doing! # This tag requires that the tag GENERATE_LATEX is set to YES. -LATEX_FOOTER = +LATEX_FOOTER = # The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined # LaTeX style sheets that are included after the standard style sheets created @@ -1756,7 +1756,7 @@ LATEX_FOOTER = # list). # This tag requires that the tag GENERATE_LATEX is set to YES. -#LATEX_EXTRA_STYLESHEET = +#LATEX_EXTRA_STYLESHEET = # The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the LATEX_OUTPUT output @@ -1764,7 +1764,7 @@ LATEX_FOOTER = # markers available. # This tag requires that the tag GENERATE_LATEX is set to YES. -LATEX_EXTRA_FILES = +LATEX_EXTRA_FILES = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is # prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will @@ -1864,14 +1864,14 @@ RTF_HYPERLINKS = NO # default style sheet that doxygen normally uses. # This tag requires that the tag GENERATE_RTF is set to YES. -RTF_STYLESHEET_FILE = +RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an RTF document. Syntax is # similar to doxygen's config file. A template extensions file can be generated # using doxygen -e rtf extensionFile. # This tag requires that the tag GENERATE_RTF is set to YES. -RTF_EXTENSIONS_FILE = +RTF_EXTENSIONS_FILE = # If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code # with syntax highlighting in the RTF output. @@ -1916,7 +1916,7 @@ MAN_EXTENSION = .3 # MAN_EXTENSION with the initial . removed. # This tag requires that the tag GENERATE_MAN is set to YES. -#MAN_SUBDIR = +#MAN_SUBDIR = # If the MAN_LINKS tag is set to YES and doxygen generates man output, then it # will generate one additional manual file for each entity documented in the real @@ -2029,7 +2029,7 @@ PERLMOD_PRETTY = YES # overwrite each other's variables. # This tag requires that the tag GENERATE_PERLMOD is set to YES. -PERLMOD_MAKEVAR_PREFIX = +PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor @@ -2078,7 +2078,7 @@ INCLUDE_PATH = ./src/core/CL/cl_kernels/ # used. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -INCLUDE_FILE_PATTERNS = +INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that are # defined before the preprocessor is started (similar to the -D option of e.g. @@ -2108,7 +2108,7 @@ PREDEFINED = DOXYGEN_SKIP_THIS \ # definition found in the source code. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -EXPAND_AS_DEFINED = +EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will # remove all references to function-like macros that are alone on a line, have @@ -2137,13 +2137,13 @@ SKIP_FUNCTION_MACROS = YES # the path). If a tag file is not located in the directory in which doxygen is # run, you must also specify the path to the tagfile here. -TAGFILES = +TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create a # tag file that is based on the input files it reads. See section "Linking to # external documentation" for more information about the usage of tag files. -GENERATE_TAGFILE = +GENERATE_TAGFILE = # If the ALLEXTERNALS tag is set to YES, all external class will be listed in # the class index. If set to NO, only the inherited external classes will be @@ -2192,14 +2192,14 @@ CLASS_DIAGRAMS = YES # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. -MSCGEN_PATH = +MSCGEN_PATH = # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. # If left empty dia is assumed to be found in the default search path. -#DIA_PATH = +#DIA_PATH = # If set to YES the inheritance and collaboration graphs will hide inheritance # and usage relations if the target is undocumented or is not a class. @@ -2248,7 +2248,7 @@ DOT_FONTSIZE = 10 # the path where dot can find it using this tag. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_FONTPATH = +DOT_FONTPATH = # If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for # each documented class showing the direct and indirect inheritance relations. @@ -2388,26 +2388,26 @@ INTERACTIVE_SVG = NO # found. If left blank, it is assumed the dot tool can be found in the path. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_PATH = +DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the \dotfile # command). # This tag requires that the tag HAVE_DOT is set to YES. -DOTFILE_DIRS = +DOTFILE_DIRS = # The MSCFILE_DIRS tag can be used to specify one or more directories that # contain msc files that are included in the documentation (see the \mscfile # command). -MSCFILE_DIRS = +MSCFILE_DIRS = # The DIAFILE_DIRS tag can be used to specify one or more directories that # contain dia files that are included in the documentation (see the \diafile # command). -#DIAFILE_DIRS = +#DIAFILE_DIRS = # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the # path where java can find the plantuml.jar file. If left blank, it is assumed @@ -2415,12 +2415,12 @@ MSCFILE_DIRS = # generate a warning when it encounters a \startuml command in this case and # will not generate output for the diagram. -#PLANTUML_JAR_PATH = +#PLANTUML_JAR_PATH = # When using plantuml, the specified paths are searched for files specified by # the !include statement in a plantuml block. -#PLANTUML_INCLUDE_PATH = +#PLANTUML_INCLUDE_PATH = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes # that will be shown in the graph. If the number of nodes in a graph becomes diff --git a/docs/user_guide/errata.dox b/docs/user_guide/errata.dox index c195dc7851..d22659f484 100644 --- a/docs/user_guide/errata.dox +++ b/docs/user_guide/errata.dox @@ -30,6 +30,30 @@ namespace arm_compute @section S7_1_errata Errata +- (COMPMID-7191) Fix Memory violation in non-optimized Neon™ FP32 GeMM + - Versions: >= v17.09 && < v24.07 + - Oses: Linux, Android, MacOS, Windows. + - Conditions: + - Compile the latest Arm Compute Library for armv8a + - RHS with batches + +- (COMPMID-7011) NELSTMLayer and CLSTMLayer crash when given a memory manager. + - Versions: >= v18.11 && < 24.07 + - Oses: Linux, Android, MacOS, Windows. + - Conditions: + - Compile the latest Arm Compute Library for armv8a + - Use NELSTMLayer or CLLSTMLayer with a memory manager + +- (COMPMID-7109) Under certain conditions, Quantized GEMM may result in very few mismatches due to 16-bit accumulation overflow + - Versions: >= v17.09 + - Oses: Linux, Android, MacOS, Windows. + - Conditions: + - Compile the latest Arm Compute Library for armv8a + - Device without dot product support + - In the matrix multiplication + - Lhs matrix must have -128 values eight positions apart from each other in its row + - Rhs matrix must have -128 values at the same positions as Lhs + - (COMPMID-6904) Fix out-of-bound memory write for non-optimized FP16 GeMM kernel. - Versions: >= v17.09 && < v24.06 - Oses: Linux, Android, MacOS, Windows. diff --git a/docs/user_guide/library.dox b/docs/user_guide/library.dox index 5a337c374b..65ef9b0c4f 100644 --- a/docs/user_guide/library.dox +++ b/docs/user_guide/library.dox @@ -58,7 +58,7 @@ Required toolchain: android-ndk-r23-beta5 or later. To build for BF16: "neon" flag should be set "=1" and "arch" has to be "=armv8.6-a", "=armv8.6-a-sve", or "=armv8.6-a-sve2". For example: - scons arch=armv8.6-a-sve neon=1 opencl=0 extra_cxx_flags="-fPIC" benchmark_tests=0 validation_tests=0 validation_examples=1 os=android Werror=0 toolchain_prefix=aarch64-linux-android29 + scons arch=armv8.6-a-sve neon=1 opencl=0 extra_cxx_flags="-fPIC" benchmark_tests=0 validation_tests=0 examples=1 os=android Werror=0 toolchain_prefix=aarch64-linux-android29 To enable BF16 acceleration when running FP32 "fast-math" has to be enabled and that works only for Neon convolution layer using cpu gemm. In this scenario on CPU: the CpuGemmConv2d kernel performs the conversion from FP32, type of input tensor, to BF16 at block level to exploit the arithmetic capabilities dedicated to BF16. Then transforms back to FP32, the output tensor type. diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index 16664c8d84..4a9da5b466 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -41,6 +41,16 @@ If there is more than one release in a month then an extra sequential number is @section S2_2_changelog Changelog +v24.07 Public major release + - Fix overflow issue in NEMeanStdDevNormalizationLayer for Fp16 + - Expose CpuActivation functionality using the experimental operators api + - Expose CpuGemm functionality using the experimental operators api + - Expose CpuGemmConv2d functionality using the experimental operators api + - Expose CpuGemmDirectConv2d functionality using the experimental operators api + - Expose CpuTranspose functionality using the experimental operators api + - Expose CpuWinogradConv2d functionality using the experimental operators api + - Optimize CPU operator memory management. + v24.06 Public minor release - Enable FP16 in multiple Neon™ kernels for multi_isa + v8a - Fix OpenMP® thread scheduling for large machine diff --git a/filelist.json b/filelist.json index e833de9fc7..65804164d8 100644 --- a/filelist.json +++ b/filelist.json @@ -1437,7 +1437,13 @@ "src/cpu/operators/CpuDirectConv3d.cpp", "src/cpu/kernels/CpuDirectConv3dKernel.cpp", "src/runtime/NEON/functions/NEConv3D.cpp" - ] + ], + "neon":{ + "fp32":["src/cpu/kernels/conv3d/generic/neon/fp32.cpp"], + "fp16":["src/cpu/kernels/conv3d/generic/neon/fp16.cpp"], + "qasymm8":["src/cpu/kernels/conv3d/generic/neon/qasymm8.cpp"], + "qasymm8_signed":["src/cpu/kernels/conv3d/generic/neon/qasymm8_signed.cpp"] + } } }, "ElementwiseBinary": { @@ -1592,7 +1598,13 @@ "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp", "src/runtime/NEON/functions/NEGEMM.cpp", "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp", - "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp" + "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp", + "src/runtime/experimental/operators/CpuActivation.cpp", + "src/runtime/experimental/operators/CpuGemm.cpp", + "src/runtime/experimental/operators/CpuGemmConv2d.cpp", + "src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp", + "src/runtime/experimental/operators/CpuTranspose.cpp", + "src/runtime/experimental/operators/CpuWinogradConv2d.cpp" ], "neon": { "common": [ diff --git a/src/BUILD.bazel b/src/BUILD.bazel index f270824ab4..b63b19a8ca 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -743,6 +743,10 @@ filegroup( "cpu/kernels/boundingboxtransform/generic/neon/impl.cpp", "cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp", "cpu/kernels/cast/generic/neon/fp16.cpp", + "cpu/kernels/conv3d/generic/neon/fp16.cpp", + "cpu/kernels/conv3d/generic/neon/fp32.cpp", + "cpu/kernels/conv3d/generic/neon/qasymm8.cpp", + "cpu/kernels/conv3d/generic/neon/qasymm8_signed.cpp", "cpu/kernels/crop/generic/neon/fp16.cpp", "cpu/kernels/crop/generic/neon/fp32.cpp", "cpu/kernels/crop/generic/neon/integer.cpp", @@ -1008,7 +1012,13 @@ filegroup( "runtime/SubTensor.cpp", "runtime/Tensor.cpp", "runtime/TensorAllocator.cpp", - "runtime/Utils.cpp"] + + "runtime/Utils.cpp", + "runtime/experimental/operators/CpuActivation.cpp", + "runtime/experimental/operators/CpuGemm.cpp", + "runtime/experimental/operators/CpuGemmConv2d.cpp", + "runtime/experimental/operators/CpuGemmDirectConv2d.cpp", + "runtime/experimental/operators/CpuTranspose.cpp", + "runtime/experimental/operators/CpuWinogradConv2d.cpp"] + glob(["**/*.h", "**/*.hpp", "**/*.inl"]), diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87c5f8b21d..c8654810cf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -734,6 +734,10 @@ target_sources( cpu/kernels/boundingboxtransform/generic/neon/impl.cpp cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp cpu/kernels/cast/generic/neon/fp16.cpp + cpu/kernels/conv3d/generic/neon/fp16.cpp + cpu/kernels/conv3d/generic/neon/fp32.cpp + cpu/kernels/conv3d/generic/neon/qasymm8.cpp + cpu/kernels/conv3d/generic/neon/qasymm8_signed.cpp cpu/kernels/crop/generic/neon/fp16.cpp cpu/kernels/crop/generic/neon/fp32.cpp cpu/kernels/crop/generic/neon/integer.cpp @@ -1000,4 +1004,10 @@ target_sources( runtime/Tensor.cpp runtime/TensorAllocator.cpp runtime/Utils.cpp + runtime/experimental/operators/CpuActivation.cpp + runtime/experimental/operators/CpuGemm.cpp + runtime/experimental/operators/CpuGemmConv2d.cpp + runtime/experimental/operators/CpuGemmDirectConv2d.cpp + runtime/experimental/operators/CpuTranspose.cpp + runtime/experimental/operators/CpuWinogradConv2d.cpp ) \ No newline at end of file diff --git a/src/common/cpuinfo/CpuIsaInfo.h b/src/common/cpuinfo/CpuIsaInfo.h index 9d6bc07b67..b1f5d220a4 100644 --- a/src/common/cpuinfo/CpuIsaInfo.h +++ b/src/common/cpuinfo/CpuIsaInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_COMMON_CPUINFO_CPUISAINFO_H -#define SRC_COMMON_CPUINFO_CPUISAINFO_H +#ifndef ACL_SRC_COMMON_CPUINFO_CPUISAINFO_H +#define ACL_SRC_COMMON_CPUINFO_CPUISAINFO_H #include @@ -81,4 +81,4 @@ init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t p } // namespace cpuinfo } // namespace arm_compute -#endif /* SRC_COMMON_CPUINFO_CPUISAINFO_H */ +#endif // ACL_SRC_COMMON_CPUINFO_CPUISAINFO_H diff --git a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp index 486cd6d331..29e9ccb656 100644 --- a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -67,10 +67,8 @@ struct ROIAlignKernel static const ROIAlignKernel available_kernels[] = { {"fp32_neon_roialign", [](const ROIAlignSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_roialign)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC {"fp16_neon_roialign", [](const ROIAlignSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_roialign)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #if defined(ARM_COMPUTE_ENABLE_NEON) {"qu8_neon_roialign", [](const ROIAlignSelectorData &data) { return data.dt == DataType::QASYMM8; }, REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qu8_roialign)}, diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp index 15064aeedc..aaf03b7320 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp index c3daaf04fe..64832a38a9 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp index 3de4bdc1fb..8994c915f3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp index 19264c9fce..e4b8bf8ba4 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp index 5d7b54f235..4c1cca73c7 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp index c3da81448b..1d5ab292d2 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp index d8ca3d7437..35105fc63b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp index c9a554e9ad..b6412476c9 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp index 4e64a2bf2b..9974239de7 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp index 72e68482c6..dfc71ec16f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp index a1e1dd0e99..079d29099f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp index 96feeeeece..0d7d868ffd 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp index 8954999990..9ec181520c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp index 6ae0b30afd..03efe13cc1 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp index cecaf79704..df2560b82b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 4913340c4c..4af3411d6c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp index 08f40b785f..7b9843f92a 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index cee3fb59c5..d9652821d3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp index fd8686c15e..ec90f77a5f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 7dedfd972a..0743d3ed90 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp index 9bfcd9cd3c..3dc65efea1 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp index 972f7eb535..21cb17f713 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp index 3adf8b0d9f..183d2a4e94 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp index 76045f30d6..ede67ee51f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp index 5ab61fad4c..387a02ae5c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp index 24fe255dfb..c256a652ad 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp index 3426fbc3f9..b326b21741 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 32939eb6dc..625a9d7b67 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp index a2f577784f..ef9f15f97b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp index 9cafd23fb8..b220a6c735 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp index c9bb1f41da..fed5ba7b63 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index cc18dd4bb4..eb8dd080a9 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp index 916c8a4afe..4b8c4531f9 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp index 77b7d231e0..591a7368b6 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp index be8fbfa0e2..2849bb6261 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp index 17afc92e30..f8f88cd1ae 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index b21ad484e5..59eb11649b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp index aad34c4c25..225d39f9a3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp index 5a28daffbf..877b2a2a66 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp index f7aa889b56..53b5d66ac9 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp index d69f391514..95b7ad9b54 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp index 61cec2b66d..9bf327adf8 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index 0770c126ec..bfe0f2aca8 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8qa_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8qa_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp index d1872c90f8..6340e861e6 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8qa_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8qa_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp index df955206e2..cb7e83c674 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp index c2bec4cdab..12e44c56ea 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index ed99f1f642..baad8ee3e7 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp index 2b6f70c089..14ad12676d 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp index 2d558ade3f..5be29aabc0 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 415e344832..af7c3a9677 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp index f90fbc3906..b553abbdfe 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp index 3a7d1cb0b4..d911e83f6e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp index e85cb9e017..cad3fe5daf 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp index 6b75d12295..00e1d0eeed 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp index 37a9febf47..9d78ff3320 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp index 2e6f1123a4..33a12a50de 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp index 27fcb2e6d2..6a2508cdcb 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp index 066ce06aa6..2362722201 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp index 1bf3a84959..6b52df7d43 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp index 84263cb564..2defe1833b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp index 58b7824b98..b0e5406014 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 313036876e..04355ce584 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp index 96cfd5e497..27c11892ce 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 39f1b3635f..5f54bae1ab 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp index d15a3a8377..9d7e351221 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp index 2c868b6cf3..177b174aaf 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp index efd37c38ec..99a3aee669 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp index 2e2a45bab0..c9ba28cd77 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp index 066b935486..964c50804c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp index dc7a40ff54..4602ac16fa 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s1_4rows_mla_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s1_4rows_mla_za/generic.cpp index a385893146..769b5d486a 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s1_4rows_mla_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s1_4rows_mla_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s2_4rows_mla_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s2_4rows_mla_za/generic.cpp index 26315101b4..1e5b8742ac 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s2_4rows_mla_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s2_4rows_mla_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s1_4rows_mla_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s1_4rows_mla_za/generic.cpp index 3741b973b4..ff11131e8d 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s1_4rows_mla_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s1_4rows_mla_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s2_4rows_mla_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s2_4rows_mla_za/generic.cpp index 81ad8e5833..2325ec13e3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s2_4rows_mla_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s2_4rows_mla_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s1_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s1_4rows_dot_za/generic.cpp index be82e04613..502627a7c1 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s1_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s1_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s2_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s2_4rows_dot_za/generic.cpp index a3b9ca402a..84e1c29542 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s2_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s2_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za/generic.cpp index b72042558d..06c15093c3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za/generic.cpp index 3a56e69d26..65979d99f8 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s1_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s1_4rows_dot_za/generic.cpp index 845f376926..00f82d9898 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s1_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s1_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s2_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s2_4rows_dot_za/generic.cpp index 1d0efc6bc1..fdebaea83b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s2_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s2_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s1_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s1_4rows_dot_za/generic.cpp index bb68733a45..20c58bbc4a 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s1_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s1_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s2_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s2_4rows_dot_za/generic.cpp index 3da0d14d74..ffe44e115e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s2_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s2_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s1_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s1_4rows_dot_za/generic.cpp index 60c3a1e632..a23903d684 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s1_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s1_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s2_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s2_4rows_dot_za/generic.cpp index e4ce6c74fb..197a0e03c6 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s2_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s2_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s1_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s1_4rows_dot_za/generic.cpp index d33ef764ef..e167f54f4b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s1_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s1_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s2_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s2_4rows_dot_za/generic.cpp index 6c144afa77..e658e4b9e6 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s2_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s2_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za/generic.cpp index 612beb342a..2b42227022 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za/generic.cpp index 8ce04fb8c2..ac552f6d76 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za/generic.cpp index 64023eeaff..da97c06f83 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za/generic.cpp index d8dc69127e..09d9d31479 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp index d807856ccb..eb0d83fa4b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 90982b6990..ba5ffd6259 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp index a22ab39d6f..ea1df55525 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp index 4f8368acd5..c6a0b5defc 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp index 41eaa4f18c..6d031ca47e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp index c0be293cd7..f87ed9fa29 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp index 58decdba1c..6543bc8eee 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp index d5fbb6baee..84943f16c8 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp index fdbee67926..76024c0198 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 1ec0cb2cbf..fc1436d7bc 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp index 1bdef85274..2c2866c84c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 873b4736ff..f4462d6e14 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp index 015d0e63c2..89f08873a5 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp index 4809b0c45c..124a15f214 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp index 35445595f8..d8b77a1906 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp index 3db248924f..b7c306bd16 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp index e6090fda94..39d88d703d 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp index 98427701fa..7c5f824acf 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp index 075181a488..7e14b18e1a 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp index bf65e04d32..4faf381f7c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp index d53daaa8a0..1c97dae722 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp index 3a71baaf61..3dd666e638 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp index 84ab4b5035..f3c596909f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index 1770ec182c..87c24ce738 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp index 0cee302c56..3b56ee0d67 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp index 8ac522dc9a..6de5289a6c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp index fc9a48bb46..f1881cc1df 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp index 7ff724ddd8..5bb90ab0a3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp index 274b29dcfc..e182b14bc0 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp index a3b2b429c0..235750b3f9 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp index d9c8644fc4..d69b460eab 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp index f0860c98b9..b30631e38e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp index 5c26010c0d..471fdcc981 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp index 1ea2fcbfbd..b2738c49a5 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp index b8adbb8262..cbcb5123e7 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp index a9cd8a7fa9..de13ed6775 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp index 4b65a67309..5c3d9c25e8 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp index 887eccf1e9..e38ceff311 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp index 754d06d443..9dfd8cb63e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp index f24a258484..6cb1f4fd3a 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp index d0e8639229..f7e108ac16 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp index 5df848d1dd..c826a4255c 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp index f7be92e53f..a28492fe56 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 4b073b9076..1df19d7905 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp index c92e2cdebd..c13c20dc70 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp index cf0047638e..243a41527a 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp index d236f07b1c..7ab432da3b 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index f4202de1ed..740068e3da 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp index f4706635dc..25d1b36279 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp index 5d082102b3..d60f0639f7 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 7e62ac1afc..c6d684d30c 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp index 411fd11460..a5a4adce46 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp index 019f402911..2c517695c4 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp index f7b8dc761c..bee638fc25 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp index f8984c451c..d6f9399620 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 66cdb7f849..9dcdf0ab0f 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp index 2ceef125ca..b4b6a70b71 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp index 31a3489e5c..8837c8ddac 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp index f4927c5536..c6636afd3d 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp index 67b07205cd..e9ea4e96b5 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_generic_depthfirst/generic.cpp index 60f17b7bc2..ed0bac8cf4 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 7fc776ed4e..f497e5292b 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_generic_depthfirst/generic.cpp index afa2ccbd71..7ba0ac03c7 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp index 8c8532827a..18a96c7f2f 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_generic_depthfirst/generic.cpp index 86e7f84542..5c7221f905 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 3c7213a498..c60111b8e6 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_generic_depthfirst/generic.cpp index 0dabc2f292..a57252c0c9 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_avg_generic_depthfirst/generic.cpp index c24e977dc6..c4b88b87aa 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 96617566a8..97f88b278e 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_generic_depthfirst/generic.cpp index d2b45cd353..71b14cc9d8 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_avg_generic_depthfirst/generic.cpp index 91f2f7ab31..6d13a5f0e7 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_max_generic_depthfirst/generic.cpp index e9b586f4ce..7ec177c9e5 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_avg_generic_depthfirst/generic.cpp index f0e7bbf5cc..a1f03d885f 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 9088cbde89..c4bb0f2d7d 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_generic_depthfirst/generic.cpp index 06f13e8111..be5b45f21f 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_avg_generic_depthfirst/generic.cpp index 52c52ccdb9..6ad693547b 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_max_generic_depthfirst/generic.cpp index c8e8e7d399..e569c6a8d9 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp index 1ba78f3fba..1ac43363b7 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp index 2bef44ea5c..24b19d4564 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 31bbfd085e..c2cc40e8b3 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp index 1a01412836..d5184372e1 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp index c5ea5adea0..510288a477 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp index 7c94894892..dae8e75d74 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index d9cebd1363..d9a49a1743 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp index 87fc75adda..e1d842c9ed 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp index 7925905e64..7647f1a125 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 5681cc1f3d..d8496b39ae 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp index da9e1408f9..a2018d209a 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp index 19a3b112ad..2e5a6042a3 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp index 4fc1532d5a..923d5e7487 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp index f3f4950a1f..93db6b1044 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 8612555bfb..2a7092a291 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp index be0eb398ae..b5617bbfb6 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp index e8339a2cd9..b840a8df96 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp index 94522cdaaa..a79b0bf370 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp index 1ca478513c..51417ed560 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp index ded2c75127..9c5c0d7bb6 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/barrier.hpp b/src/core/NEON/kernels/arm_gemm/barrier.hpp index 8fbcddfef8..b7abd87c29 100644 --- a/src/core/NEON/kernels/arm_gemm/barrier.hpp +++ b/src/core/NEON/kernels/arm_gemm/barrier.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp index 0ddca04846..c8bd8fd658 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2022-2023 Arm Limited. + * Copyright (c) 2017-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp index c7adf8e4ac..12bddf15e1 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2022-2023 Arm Limited. + * Copyright (c) 2017-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/gemm_implementation.hpp b/src/core/NEON/kernels/arm_gemm/gemm_implementation.hpp index 5e77df7d4a..db5155f500 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_implementation.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_implementation.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, 2022-2023 Arm Limited. + * Copyright (c) 2018-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/gemm_int16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_int16.cpp index aa6ecc2919..befc1a58a3 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_int16.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_int16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2022-2023 Arm Limited. + * Copyright (c) 2017-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp index 25b6cf0cf2..44f085c183 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2022-2023 Arm Limited. + * Copyright (c) 2017-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a32_interleave6_block1_fp32_fp32.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a32_interleave6_block1_fp32_fp32.hpp index 807511f0d2..ffd9d4b22a 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a32_interleave6_block1_fp32_fp32.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a32_interleave6_block1_fp32_fp32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp index e4bfc0f6e4..d5a41a332d 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp16_fp16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp16_fp16.hpp index e54b3b9f41..35de179ed4 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp16_fp16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp16_fp16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp16_fp32.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp16_fp32.hpp index 3a5dcf4a6b..59981e9979 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp16_fp32.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp16_fp32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp32_fp32.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp32_fp32.hpp index 80c387db47..9eeabfa9eb 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp32_fp32.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_fp32_fp32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_s16_s16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_s16_s16.hpp index 8e06b7ecab..27b3335694 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_s16_s16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_s16_s16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_s8_s16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_s8_s16.hpp index c41120c698..4c90691edc 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_s8_s16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_s8_s16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_u8_u16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_u8_u16.hpp index d29a995b46..8901908140 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_u8_u16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block1_u8_u16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block2_bf16_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block2_bf16_bf16.hpp index 43d9d20c10..db610427a8 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block2_bf16_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block2_bf16_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block2_fp32_fp32.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block2_fp32_fp32.hpp index 3ec03370a0..b3a52451e8 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block2_fp32_fp32.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block2_fp32_fp32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_bf16_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_bf16_bf16.hpp index e9799f87a9..33639c201e 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_bf16_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_bf16_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_fp32_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_fp32_bf16.hpp index 730bfd6342..3044cfde48 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_fp32_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_fp32_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_s8_s8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_s8_s8.hpp index 15d8ddbe53..8bb9898861 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_s8_s8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block4_s8_s8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block8_s8_s8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block8_s8_s8.hpp index 7b445ef3d4..6e1efa3814 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block8_s8_s8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave8_block8_s8_s8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave1VL_block2_fp32_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave1VL_block2_fp32_bf16.hpp index a5f4754d3d..8ed20a1e48 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave1VL_block2_fp32_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave1VL_block2_fp32_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave2VL_block2_fp32_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave2VL_block2_fp32_bf16.hpp index c1d0ac5bc7..aaa01039cf 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave2VL_block2_fp32_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave2VL_block2_fp32_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave4VL_block2_fp32_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave4VL_block2_fp32_bf16.hpp index 03575d7ff2..83a7f62693 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave4VL_block2_fp32_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme2_interleave4VL_block2_fp32_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_bf16_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_bf16_bf16.hpp index 453778ae3f..45f660fec1 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_bf16_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_bf16_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block2_bf16_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block2_bf16_bf16.hpp index 98bdcd2fa2..ce7192afe6 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block2_bf16_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block2_bf16_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block2_fp16_fp16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block2_fp16_fp16.hpp index 30c3e42aed..982cfa6d40 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block2_fp16_fp16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block2_fp16_fp16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -10,16 +10,16 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #if defined(__ARM_FEATURE_SVE) diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_s8_s8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_s8_s8.hpp index 4390bb7c7f..79cd668a84 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_s8_s8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_s8_s8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_s8_s8_summing.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_s8_s8_summing.hpp index f5ee261964..fe98bd86b5 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_s8_s8_summing.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_s8_s8_summing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_u8_u8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_u8_u8.hpp index 76c1d053cd..bc7d013798 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_u8_u8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_u8_u8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_u8_u8_summing.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_u8_u8_summing.hpp index daf2d3a100..66fcd800d4 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_u8_u8_summing.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_block4_u8_u8_summing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_fp16_fp16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_fp16_fp16.hpp index 274f69f370..fb0a74b3bd 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_fp16_fp16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_fp16_fp16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_fp32_fp32.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_fp32_fp32.hpp index ab290649fd..3fe3885068 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_fp32_fp32.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave1VL_fp32_fp32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_bf16_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_bf16_bf16.hpp index dc6d12b61e..1ed835b21b 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_bf16_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_bf16_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block2_bf16_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block2_bf16_bf16.hpp index d9189258c1..715810ddea 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block2_bf16_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block2_bf16_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block2_fp16_fp16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block2_fp16_fp16.hpp index ef787c89b9..849f6c3228 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block2_fp16_fp16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block2_fp16_fp16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_s8_s8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_s8_s8.hpp index 905c6b41eb..2d6e1ce6c7 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_s8_s8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_s8_s8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_s8_s8_summing.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_s8_s8_summing.hpp index c5c5af20e2..27b9bc3806 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_s8_s8_summing.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_s8_s8_summing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_u8_u8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_u8_u8.hpp index ce9a0065c7..3f3863720a 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_u8_u8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_u8_u8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_u8_u8_summing.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_u8_u8_summing.hpp index 7805152656..c8657fad1c 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_u8_u8_summing.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_block4_u8_u8_summing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_fp16_fp16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_fp16_fp16.hpp index 96ab55ee06..9403efc7c6 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_fp16_fp16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_fp16_fp16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_fp32_fp32.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_fp32_fp32.hpp index ac4b1b5086..b310651525 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_fp32_fp32.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave2VL_fp32_fp32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block2_bf16_bf16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block2_bf16_bf16.hpp index 2e53475b5c..6903945536 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block2_bf16_bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block2_bf16_bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block2_fp16_fp16.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block2_fp16_fp16.hpp index 268bdbb924..b1ba9a4fe7 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block2_fp16_fp16.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block2_fp16_fp16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -10,16 +10,16 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #if defined(__ARM_FEATURE_SVE) diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_s8_s8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_s8_s8.hpp index 67dd5a9bb7..9a5521aa8a 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_s8_s8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_s8_s8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_s8_s8_summing.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_s8_s8_summing.hpp index 21d9378368..22ec1011cc 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_s8_s8_summing.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_s8_s8_summing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_u8_u8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_u8_u8.hpp index f149c93293..a99c2ea91b 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_u8_u8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_u8_u8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_u8_u8_summing.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_u8_u8_summing.hpp index 252152e3da..0a826d38f2 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_u8_u8_summing.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_block4_u8_u8_summing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_fp32_fp32.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_fp32_fp32.hpp index b11bb93c42..f6326100b7 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_fp32_fp32.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/sme_interleave4VL_fp32_fp32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp index ef175beeb7..935958b224 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp index 8a98f667f4..32c9515582 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp index 8126826998..972b66ed50 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp index a7494d500c..877247e052 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16.hpp index 72e414969e..8d4146ab3a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16/generic.cpp index 377daddae9..d713ca5d53 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32.hpp index 4924b3a549..f6a7461740 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32/generic.cpp index 8038612200..2686b98092 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16.hpp index 94fb84e409..8f2c7fdec5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16/generic.cpp index b1cd6dc970..c6c5b047ca 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_4x24/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_4x24/generic.cpp index 8961e615d7..88547ef3b3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_4x24/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_4x24/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12.hpp index 745f89eff6..49973ddb92 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12/generic.cpp index 5f4fcac690..6fd5fd4b6c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_mmla_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_mmla_8x12/generic.cpp index 4a1c1b5638..658491571d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_mmla_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_mmla_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24.hpp index b9b4ad54df..29e524a89b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24/generic.cpp index 1e3f2f300b..744d05dbdf 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp32_mla_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp32_mla_8x12.hpp index c4445ba14a..6b01ffe63b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp32_mla_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp32_mla_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp32_mla_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp32_mla_8x12/generic.cpp index 6de0a380eb..f93bc6c719 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp32_mla_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp32_mla_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12.hpp index 8bf8d8442e..059c1f6d4d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp index 1363b939ab..226c13b400 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp index 3b9a85577e..1396c6ae8c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp index 9af1b4df12..cc4b81ecb5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp index bb5226e093..5ba14d2409 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp index 7bf36a5900..98bff14104 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp index afd2427b85..1f9d001553 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12.hpp index e49ebbd84e..c7295275e3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp index b747a1cf84..6d27dd73f1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp index 073aeab7f6..495a81692d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp index 6d333f3449..37f6dde3ab 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018,2021 Arm Limited. + * Copyright (c) 2017-2018,2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp index 63869c9fd4..7d433d1a26 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp index ff60cbc905..1c5e8472e6 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp index 1c1196b7a6..63581a6008 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp index e5728beba8..0686589f5c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp index 23b87fa192..be92554dce 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp index b47fa6a2d7..9aa5a2a9cc 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16.hpp index f1427669ea..22a80885b3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp index fc323ea4fc..10f5dd4716 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16.hpp index d9e7259fa2..e0a1ed62d9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp index f6389e27d1..a804d5c102 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp index 8b80c25beb..d012e992e6 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp index b049ed45f9..faf34f7fc0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp index 8e5f600c83..978b344f1a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp index 9ceda8fd0c..52c4e1be65 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp index dbd45460e8..f5863ba348 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp index ddbc840829..3428028ac8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp index bb84a50282..6c51c0ff3c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4.hpp index 3ec02395d1..97b6c3f8e0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp index 236865315e..cf1d3070a8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp index 004e5d7f23..36356dfb35 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24.hpp index f31dd7afd0..e04820cc10 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp index 0e468b196a..d36059d6a1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16.hpp index 71e16d68b5..191528d7f9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp index 5693c3f397..9dde5b0b92 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16.hpp index bfc9c7e8f9..857de3c6d0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp index eac0e7167e..6ef2fd5d34 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp index 3b773a6827..0c3470ec16 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16.hpp index 55ea68d1b5..7052d1cc41 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp index 883bd5afdd..722f9af535 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16.hpp index 2b7531d1e2..0ec35f7f76 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp index 38a57b0741..f8f6579d8c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp index f3942328a6..6cdca85bd2 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16.hpp index d0d5f1b80d..65f654012e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp index 0771829d37..5d34a5e9d2 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16.hpp index a02fbe8f28..e13f2fb5eb 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp index 289d38c3b6..f5545b4357 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp index 452d647bb4..ba2f77f541 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16.hpp index 4905ba5656..0f7481f0da 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp index f8a76b5244..98b4d9b997 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16.hpp index 14aba00788..926408855d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp index 00d063b426..b2cec742c4 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp index ebe583b5d4..f07902a559 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16.hpp index 17e7405a0a..f8c7f0a549 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp index 1335b355ef..9103ef59ce 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16.hpp index 38bb7c646d..16d241ff02 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp index 7f0fad7fa7..f12269be58 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp index 849c680843..d3367b959a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16.hpp index e360452108..09fba7e253 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp index 364f388e79..8c6fbd4c83 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12.hpp index 25c5bf1b44..0a97c405ac 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp index 5684f464b6..7ab854a3fe 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12.hpp index 66c2b92a34..0a46f26c55 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023 Arm Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp index bab687a9b4..6eaac71e5f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp index 8485820c7c..8360c9691b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12.hpp index 37a54fcfab..94b5bcc0a8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp index c1d37383df..2cd659d033 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp index a097dc358a..a0ada9f949 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12.hpp index 0088557b8d..1073d15f01 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp index 54c51954c8..741fa6ac08 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp index 30260b9c29..613c3f09e5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, 2023 Arm Limited. + * Copyright (c) 2019-2020, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp index f4b6e7b70f..00ec904e51 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp index 5f86da8ef3..de85605561 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp index 7709ad1be6..928b22a190 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp index dc72095a9b..711fc77d9f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp index 89f8ac2d6c..a348b4b67e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6.hpp index c1318a2a06..b35cc91a5d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp index 9b81374d2d..a968105af1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp index 0640cece0d..702b5f69ff 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4.hpp index 5f7252f019..d072470939 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4.hpp index a8e0c24eae..94312be08a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4.hpp index abf0eda008..d244aecc70 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4.hpp index 9f9c2a49db..85583c46cf 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4.hpp index 5d48a52d42..c474b9c1f9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4.hpp index 942f94b0bf..65a2281638 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp index db29e42ef1..5c9a465817 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp index d2c260536d..1cce355583 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp index c6fa11016f..9747587495 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_s8qa_dot_16VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_s8qa_dot_16VL/generic.cpp index 86bd8aeb04..a1c441555e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_s8qa_dot_16VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_s8qa_dot_16VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_u8qa_dot_16VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_u8qa_dot_16VL/generic.cpp index 093feee6ce..9bf699462a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_u8qa_dot_16VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_u8qa_dot_16VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL.hpp index edfb362aab..fcce7a1424 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL/generic.cpp index 8105300cb7..36114c5060 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL.hpp index ca7b0573fc..8b0f5b013f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL/generic.cpp index 20c1de9418..cc44c9a537 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL.hpp index 7b31d6d2db..f8812a1a71 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL/generic.cpp index 70c94d32a3..a63cadc63a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL.hpp index a9196958c7..c7fbede54e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL/generic.cpp index ad10ce7993..871b154ad1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -10,16 +10,16 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifdef __ARM_FEATURE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL.hpp index 5bd34b2ca0..929b0c97ef 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL/generic.cpp index 5c48f953e8..969fb41a92 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -10,16 +10,16 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifdef __ARM_FEATURE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL.hpp index 05029f04b0..dba440632d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL/generic.cpp index 8728cff31d..ee66b4b95c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -10,16 +10,16 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifdef __ARM_FEATURE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL.hpp index bf3de2118e..9486319cfb 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL/generic.cpp index 97be758bd6..19e5d52b53 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL.hpp index 9bc1f83100..ed54e70e28 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp index 3c475044e2..1e46aee27a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL.hpp index 165e25dd8f..1348f00d37 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL/generic.cpp index ae1f812442..a69e1f84e0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL.hpp index 7b3cc77867..a4b14325f2 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL/generic.cpp index aba677b158..c8d56dc5e5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL.hpp index 79990f72e5..b897efe0dc 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL/generic.cpp index 7033de5fe3..b60573898a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL.hpp index ef39cbbb28..5e22847853 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL/generic.cpp index 4601f05501..7b8d34d350 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL.hpp index b9d8b60c8d..84386009a0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL/generic.cpp index d11faa634d..67c759410a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL.hpp index f05d2cf215..2899d7553c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL/generic.cpp index 47de894306..7f44e5ffe5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL.hpp index ce10ab30e7..0c29ab9991 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL/generic.cpp index a23c44b7da..932bd6b595 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL.hpp index fb84883913..f540d3fa24 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp index 96247d2db5..0a468e0ff7 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL.hpp index f8c375f9f5..8f5880bcea 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp index 9a59799529..8e8524a780 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL.hpp index 04d19324c5..0665468517 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp index 0f3346e65e..2239b3f1be 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL.hpp index 1ce169d562..4cf20bef91 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp index 9136e32567..7fc723ecad 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL.hpp index c42ad7e879..8e83f1cb2c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp index 66601bd312..ffad168b44 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp index 842db1a4fc..d5ccf3476c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL.hpp index 3a93a2f7c8..ffa365b8a0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/a64fx.cpp index 8e4fd4388e..53cd52fe56 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/generic.cpp index b1ab31e618..8f12e9ee62 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp index 57f42cce77..c2b6dd1030 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_bf16fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_bf16fp32_mmla_8x3VL/generic.cpp index 576bd47039..09bc24051a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_bf16fp32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_bf16fp32_mmla_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL.hpp index 60f1b699c3..13ad2404e3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/a64fx.cpp index 69ddb21c31..0389fb043a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/generic.cpp index 23503fa108..bc23dc28b0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL.hpp index ac6986913d..21e811497a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/a64fx.cpp index c65c3a3ce4..d67c01a574 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/generic.cpp index 4b20be6f01..5f29a6ce3c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp index 49ccce342e..4de4101148 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp index 176f6e0d3a..688e7377b9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL.hpp index 223d8a78de..d7ca55c295 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL/generic.cpp index 74e2d267bc..ad997b0034 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp index b930e4c0d5..86d6ecab54 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp index d1a9bb4a26..8578733628 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp index 041825df6b..64c4dfcd0e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp index 880f9d1a27..7936eeb11a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/a64fx.cpp index 66481f04f9..430c081288 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp index e1581f2026..da4670d7e0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp index a353c9d660..06a2d34767 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/a64fx.cpp index 344341205b..17eaa5641d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp index 161c85e5f3..b79934094e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp index 66c106d2eb..7dc786fd66 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp index 2b2a0684f9..b0d6f756d7 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp index 15b7dd721c..230c0b77d7 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL/generic.cpp index 0d2b47ec39..d31e68993e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp index ffc1606b3f..dc008866a1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp index b7c523466e..b06e0bd3c3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL.hpp index ae922e9743..4a57f89880 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL/generic.cpp index e0628364f4..0d5ea54cb7 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp index 056ae7a616..6116e0cefb 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp index c28717a37e..b3fb963111 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL.hpp index b1b1135c73..11ff5b2f15 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL/generic.cpp index cd5f85411c..1a1201310e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp index cfa349f3aa..a6abb8d354 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/a64fx.cpp index 1a483210f3..92a350c8a8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp index eeef192b66..f9b84e26fc 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL.hpp index 686295496e..8135172b54 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL/generic.cpp index f66b6345ea..abe4b92faf 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp index 11fe5ce7e3..839ff6f0af 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp index e74b424888..85e933fd46 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL.hpp index 5de68cc738..e37ade4a00 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL/generic.cpp index 69894bec41..7b67ccd545 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp index e9197e8ec5..920fca738c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/a64fx.cpp index 4d0f44982a..6a0aeb79b4 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp index 7871c0b003..03b41dabe4 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL.hpp index 8c6a3dba7d..2c4eaaab4a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL/generic.cpp index 9269576d90..32cb778de4 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp index 1ae035c614..ce3b070052 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp index e507bc5551..a6eefc1006 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp index c5096ff4ba..42a7dbb37a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023 Arm Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp index ba7185752a..3ffec98c16 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp index 6c54167763..e04e3d2a1a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/a64fx.cpp index 609277d889..6e19811d72 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp index 3b16c97e2c..865d011ac1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp index 23ab7ce10a..000866346f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/a64fx.cpp index 0b13913717..ee9f58f811 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp index c7f32ff7a9..f459fe92c9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp index a355262fe2..71e8551b92 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp index cf3069f828..ed1faeccd9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/a64fx.cpp index c668a7b746..0159fe4923 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp index f6e1a75c15..88a086fac7 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp index 82734abfbe..d86943c9b2 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp index bfed5000fc..afc8038956 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp index c0b215ccb4..171c810c8f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/a64fx.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/a64fx.cpp index 79e794a834..e2151ef41a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/a64fx.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/a64fx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp index 1c88336c2d..eaa3ad2428 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp index 067d0bf258..3bbf2bbfe4 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp index 28449ea99b..c66026104d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/performance_parameters.hpp b/src/core/NEON/kernels/arm_gemm/performance_parameters.hpp index 059ab5f7df..ea00cccae5 100644 --- a/src/core/NEON/kernels/arm_gemm/performance_parameters.hpp +++ b/src/core/NEON/kernels/arm_gemm/performance_parameters.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/quantized.cpp b/src/core/NEON/kernels/arm_gemm/quantized.cpp index 6da9f4be0e..baf692d8d0 100644 --- a/src/core/NEON/kernels/arm_gemm/quantized.cpp +++ b/src/core/NEON/kernels/arm_gemm/quantized.cpp @@ -966,7 +966,7 @@ void compute_row_sums(const Requantize32 &qp, unsigned int width, unsigned int h const unsigned int odds = width % 16; /* Generate a mask to use on the last iteration, if necessary. */ - uint64x2_t mask; + uint64x2_t mask = vdupq_n_u64(0); unsigned int mask_mode = 0; if (odds > 0 && odds <= 8) { diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a32_transpose_interleave_8way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a32_transpose_interleave_8way_32bit.hpp index b50c240a3a..16e0822782 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a32_transpose_interleave_8way_32bit.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a32_transpose_interleave_8way_32bit.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_128.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_128.hpp index 8574d89226..af5ecf5a8b 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_128.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_128.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_1x4.hpp index cdf1f98608..ddd426e949 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_1x8.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_1x8.hpp index da0809d4d6..4e76689523 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_1x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_1x8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x2.hpp index cef468e9cc..eafa06ece1 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x4.hpp index 4c02d0534d..67493393a0 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x4_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x4_fp32bf16.hpp index 2a3208d18d..fe554a65f9 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x4_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_2x4_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_s8s16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_s8s16.hpp index 4d9d5e7f43..0d6f8b1cd4 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_s8s16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_s8s16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_u8u16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_u8u16.hpp index b0cd7e4ef7..b263e6c41d 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_u8u16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12_u8u16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16.hpp index 0399f8becc..e798793759 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -10,16 +10,16 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #pragma once diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_1x4.hpp index f3a1dde73f..087dc923e8 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_1x8.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_1x8.hpp index 7c7e91e666..93c95048a8 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_1x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_1x8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x2.hpp index b4515cbfd4..b1efe81b35 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x4.hpp index ac67467240..02ae1ade30 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x4_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x4_fp32bf16.hpp index b9fe8b126a..435398da0b 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x4_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_16_2x4_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24.hpp index 46211ad4e4..a49acf1449 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_2x4_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_2x4_fp32bf16.hpp index 1cb7bc4445..d8edd806eb 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_2x4_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_2x4_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_bf16fp32.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_bf16fp32.hpp index dcaf69d2a8..e2884ef80b 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_bf16fp32.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_bf16fp32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_fp16fp32.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_fp16fp32.hpp index 966b75664e..5e64f812e1 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_fp16fp32.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24_fp16fp32.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_32_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_32_1x4.hpp index 4a22675028..918d3ffaa3 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_32_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_32_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_32_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_32_2x2.hpp index 237536697c..747d4538bd 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_32_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_32_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_48.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_48.hpp index f35752d5a8..222551909b 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_48.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_48.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_4_1x16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_4_1x16.hpp index 6ef02ac044..7b9c7ecb30 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_4_1x16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_4_1x16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_4_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_4_1x4.hpp index 5667820865..94a4b5d07f 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_4_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_4_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_64.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_64.hpp index 328274a488..03b134e422 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_64.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_96.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_96.hpp index feb469ab0e..2719d24750 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_96.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_96.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL.hpp index a4d480c405..768719b0de 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL_1x4.hpp index 552abfc1c6..bb866b2983 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL_2x2_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL_2x2_fp32bf16.hpp index 2756327815..0e34bf143b 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL_2x2_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_16VL_2x2_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL.hpp index a6ddb8fec0..36b364a57b 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_1x4.hpp index 399a52e233..d67e353f18 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_2x2.hpp index 6318e29a79..f8980d25f6 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_2x2_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_2x2_fp32bf16.hpp index b90063028d..c740a9c64b 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_2x2_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_1VL_2x2_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL.hpp index f827197ab7..f7d29a9f01 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_1x4.hpp index c471d66e17..f07d34f46c 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_2x2.hpp index 5f967fa615..35d74e727b 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_2x2_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_2x2_fp32bf16.hpp index f22b833821..284216a337 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_2x2_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_2VL_2x2_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL.hpp index 14636e3218..9677ea2016 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_1x4.hpp index 2d46a481f3..94d1c0840a 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_2x2.hpp index 002a12479a..4327466387 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_2x2_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_2x2_fp32bf16.hpp index 2a43f34f71..1c97bed317 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_2x2_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_4VL_2x2_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL.hpp index be9ad666a9..98e8bb20a5 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL_1x4.hpp index 45d2e24258..5d9c123835 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL_2x2.hpp index ec7c415e27..9e1b2dca3e 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sme_transpose_interleave_8VL_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_12VL_2x4_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_12VL_2x4_fp32bf16.hpp index f627fe575f..7e9b40b0d0 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_12VL_2x4_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_12VL_2x4_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_1VL.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_1VL.hpp index b33c4f6c2d..beddf76c5b 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_1VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_1VL_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_1VL_1x4.hpp index e468787815..1103008fe2 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_1VL_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_1VL_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL.hpp index 546800fa69..0e138e4422 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL_1x4.hpp index a44141c109..bc462414be 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL_2x2.hpp index 36a15a16b3..5cf7139fe4 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_3VL_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL.hpp index e661e2698a..ae2ae8c310 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL_1x4.hpp index 03a78f72f1..e87c602b54 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL_2x2.hpp index b196799cfe..b753c22b67 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_4VL_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_1x8.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_1x8.hpp index 68fe2d0cbe..bba8f107d8 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_1x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_1x8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_2x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_2x4.hpp index 910fc6cb02..ebfc65be34 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_2x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_2x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_2x4_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_2x4_fp32bf16.hpp index f0f10d2f43..f23b9011d0 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_2x4_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_2x4_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_4x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_4x2.hpp index c638eaacde..ac61301ea4 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_4x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_6VL_4x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL.hpp index 0526bd0596..87d5372b57 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_1x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_1x4.hpp index 98f0770d77..35e60a223c 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_1x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_1x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_1x8.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_1x8.hpp index 3fa5292143..3e20a5882e 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_1x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_1x8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x2.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x2.hpp index 02977ecf1e..eb7312b2e3 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x2.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x4.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x4.hpp index 34799c60a6..1d2c0742ea 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x4_fp32bf16.hpp b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x4_fp32bf16.hpp index 5a48e579ae..ab3af6f88d 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x4_fp32bf16.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/sve_transpose_interleave_8VL_2x4_fp32bf16.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp index 11b1bd3e05..9d8e31870d 100644 --- a/src/core/NEON/kernels/arm_gemm/utils.hpp +++ b/src/core/NEON/kernels/arm_gemm/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/NEON/kernels/assembly/depthwise.hpp b/src/core/NEON/kernels/assembly/depthwise.hpp index 13c2d314e4..0b68cb4db8 100644 --- a/src/core/NEON/kernels/assembly/depthwise.hpp +++ b/src/core/NEON/kernels/assembly/depthwise.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,9 @@ * SOFTWARE. */ +#ifndef ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_DEPTHWISE_HPP +#define ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_DEPTHWISE_HPP + #pragma once #include "arm_gemm.hpp" @@ -349,3 +352,5 @@ std::vector get_compatible_kernels(const DepthwiseArgs &, con } // namespace depthwise } // namespace arm_conv + +#endif // ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_DEPTHWISE_HPP diff --git a/src/core/NEON/kernels/assembly/pool_common.hpp b/src/core/NEON/kernels/assembly/pool_common.hpp index 045f9f95d3..cf133c38b2 100644 --- a/src/core/NEON/kernels/assembly/pool_common.hpp +++ b/src/core/NEON/kernels/assembly/pool_common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,9 @@ * SOFTWARE. */ +#ifndef ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_POOL_COMMON_HPP +#define ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_POOL_COMMON_HPP + #pragma once #ifdef CYCLE_PROFILING #include "profiler.hpp" @@ -108,3 +111,5 @@ class IPoolingCommon } // namespace pooling } // namespace arm_conv + +#endif // ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_POOL_COMMON_HPP diff --git a/src/core/NEON/kernels/assembly/pooling.hpp b/src/core/NEON/kernels/assembly/pooling.hpp index 89d594298e..2a11571742 100644 --- a/src/core/NEON/kernels/assembly/pooling.hpp +++ b/src/core/NEON/kernels/assembly/pooling.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,9 @@ * SOFTWARE. */ +#ifndef ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_POOLING_HPP +#define ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_POOLING_HPP + #pragma once #include "arm_gemm_local.hpp" @@ -223,3 +226,5 @@ UniquePoolingCommon pooling(const PoolingArgs &, const OutputSt } // namespace pooling } // namespace arm_conv + +#endif // ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_POOLING_HPP diff --git a/src/core/NEON/kernels/convolution/common/shims.hpp b/src/core/NEON/kernels/convolution/common/shims.hpp index 310bd47b82..9f8dc7a2db 100644 --- a/src/core/NEON/kernels/convolution/common/shims.hpp +++ b/src/core/NEON/kernels/convolution/common/shims.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -308,6 +308,452 @@ inline void nchw_to_nhwc( ); } +/*****************************************************************************/ +/* 16-bit implementation : NCHW -> NHWC + */ +template <> +inline void nchw_to_nhwc(const int16_t *const in, + int16_t *const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride) +{ + typedef int16_t T; + + // Fill in the stride values + in_row_stride = (in_row_stride) ? in_row_stride : n_cols; + in_channel_stride = (in_channel_stride) ? in_channel_stride : n_rows * in_row_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride : n_channels * in_channel_stride; + + out_col_stride = (out_col_stride) ? out_col_stride : n_channels; + out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride : n_rows * out_row_stride; + + // Perform the re-ordering + for (int n = 0; n < n_batches; n++) + { + const T *const in_batch = in + n * in_batch_stride; + T *const out_batch = out + n * out_batch_stride; + + for (int i = 0; i < n_rows; i++) + { + const T *const in_row = in_batch + i * in_row_stride; + T *const out_row = out_batch + i * out_row_stride; + + int j = 0, j_remaining = n_cols; +#ifdef __arm_any__ + for (; j_remaining >= 8; j += 8, j_remaining -= 8) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 8; c += 8, c_remaining -= 8) + { + // Read 8 channels worth of 8 columns, then zip to produce 8 columns + // worth of 8 channels. + int16x8_t channel_pixels[8]; + channel_pixels[0] = vld1q_s16(in_row + (c + 0) * in_channel_stride + j); + channel_pixels[1] = vld1q_s16(in_row + (c + 1) * in_channel_stride + j); + channel_pixels[2] = vld1q_s16(in_row + (c + 2) * in_channel_stride + j); + channel_pixels[3] = vld1q_s16(in_row + (c + 3) * in_channel_stride + j); + channel_pixels[4] = vld1q_s16(in_row + (c + 4) * in_channel_stride + j); + channel_pixels[5] = vld1q_s16(in_row + (c + 5) * in_channel_stride + j); + channel_pixels[6] = vld1q_s16(in_row + (c + 6) * in_channel_stride + j); + channel_pixels[7] = vld1q_s16(in_row + (c + 7) * in_channel_stride + j); + + // 0th and 4th, 1st and 5th, 2nd and 6th, 3rd and 7th channels + const int16x8x2_t zip1 = vzipq_s16(channel_pixels[0], channel_pixels[4]); + const int16x8x2_t zip2 = vzipq_s16(channel_pixels[1], channel_pixels[5]); + const int16x8x2_t zip3 = vzipq_s16(channel_pixels[2], channel_pixels[6]); + const int16x8x2_t zip4 = vzipq_s16(channel_pixels[3], channel_pixels[7]); + + // 0th, 2nd, 4th, 6th channels + const int16x8x2_t zip5 = vzipq_s16(zip1.val[0], zip3.val[0]); + const int16x8x2_t zip6 = vzipq_s16(zip1.val[1], zip3.val[1]); + + // 1st, 3rd, 5th, 7th channels + const int16x8x2_t zip7 = vzipq_s16(zip2.val[0], zip4.val[0]); + const int16x8x2_t zip8 = vzipq_s16(zip2.val[1], zip4.val[1]); + + // 0th, 1st, 2nd, ..., 7th channels + const int16x8x2_t out_0 = vzipq_s16(zip5.val[0], zip7.val[0]); + const int16x8x2_t out_1 = vzipq_s16(zip5.val[1], zip7.val[1]); + const int16x8x2_t out_2 = vzipq_s16(zip6.val[0], zip8.val[0]); + const int16x8x2_t out_3 = vzipq_s16(zip6.val[1], zip8.val[1]); + + vst1q_s16(out_row + (j + 0) * out_col_stride + c, out_0.val[0]); + vst1q_s16(out_row + (j + 1) * out_col_stride + c, out_0.val[1]); + vst1q_s16(out_row + (j + 2) * out_col_stride + c, out_1.val[0]); + vst1q_s16(out_row + (j + 3) * out_col_stride + c, out_1.val[1]); + vst1q_s16(out_row + (j + 4) * out_col_stride + c, out_2.val[0]); + vst1q_s16(out_row + (j + 5) * out_col_stride + c, out_2.val[1]); + vst1q_s16(out_row + (j + 6) * out_col_stride + c, out_3.val[0]); + vst1q_s16(out_row + (j + 7) * out_col_stride + c, out_3.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 8; _j++) + { + const T *const in_col = in_row + j + _j; + T *const out_col = out_row + (j + _j) * out_col_stride; + const T *const in_channel = in_col + c * in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + for (; j_remaining >= 4; j += 4, j_remaining -= 4) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 4; c += 4, c_remaining -= 4) + { + // Read 4 channels worth of 4 columns, then zip to produce 4 columns + // worth of 4 channels. + int16x4_t channel_pixels[4]; + channel_pixels[0] = vld1_s16(in_row + (c + 0) * in_channel_stride + j); + channel_pixels[1] = vld1_s16(in_row + (c + 1) * in_channel_stride + j); + channel_pixels[2] = vld1_s16(in_row + (c + 2) * in_channel_stride + j); + channel_pixels[3] = vld1_s16(in_row + (c + 3) * in_channel_stride + j); + + const int16x4x2_t zip1 = vzip_s16(channel_pixels[0], channel_pixels[2]); + const int16x4x2_t zip2 = vzip_s16(channel_pixels[1], channel_pixels[3]); + const int16x4x2_t out_0 = vzip_s16(zip1.val[0], zip2.val[0]); + const int16x4x2_t out_1 = vzip_s16(zip1.val[1], zip2.val[1]); + + vst1_s16(out_row + (j + 0) * out_col_stride + c, out_0.val[0]); + vst1_s16(out_row + (j + 1) * out_col_stride + c, out_0.val[1]); + vst1_s16(out_row + (j + 2) * out_col_stride + c, out_1.val[0]); + vst1_s16(out_row + (j + 3) * out_col_stride + c, out_1.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 4; _j++) + { + const T *const in_col = in_row + j + _j; + T *const out_col = out_row + (j + _j) * out_col_stride; + const T *const in_channel = in_col + c * in_channel_stride; + out_col[c] = *(in_channel); + } + } + } +#endif // __arm_any__ + for (; j_remaining; j++, j_remaining--) + { + const T *const in_col = in_row + j; + T *const out_col = out_row + j * out_col_stride; + + for (int c = 0; c < n_channels; c++) + { + const T *const in_channel = in_col + c * in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + } +} + +template <> +inline void nchw_to_nhwc( + const uint16_t* const in, + uint16_t* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + nchw_to_nhwc( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_channels, n_rows, n_cols, + in_batch_stride, in_channel_stride, in_row_stride, + out_batch_stride, out_row_stride, out_col_stride + ); +} + +#ifdef ARM_COMPUTE_ENABLE_FP16 +template <> +inline void nchw_to_nhwc( + const float16_t* const in, + float16_t* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + nchw_to_nhwc( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_channels, n_rows, n_cols, + in_batch_stride, in_channel_stride, in_row_stride, + out_batch_stride, out_row_stride, out_col_stride + ); +} +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + +/*****************************************************************************/ +/* 8-bit implementation : NCHW -> NHWC + */ +template <> +inline void nchw_to_nhwc(const int8_t *const in, + int8_t *const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride) +{ + typedef int8_t T; + + // Fill in the stride values + in_row_stride = (in_row_stride) ? in_row_stride : n_cols; + in_channel_stride = (in_channel_stride) ? in_channel_stride : n_rows * in_row_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride : n_channels * in_channel_stride; + + out_col_stride = (out_col_stride) ? out_col_stride : n_channels; + out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride : n_rows * out_row_stride; + + // Perform the re-ordering + for (int n = 0; n < n_batches; n++) + { + const T *const in_batch = in + n * in_batch_stride; + T *const out_batch = out + n * out_batch_stride; + + for (int i = 0; i < n_rows; i++) + { + const T *const in_row = in_batch + i * in_row_stride; + T *const out_row = out_batch + i * out_row_stride; + + int j = 0, j_remaining = n_cols; +#ifdef __arm_any__ + for (; j_remaining >= 16; j += 16, j_remaining -= 16) + { + + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 16; c += 16, c_remaining -= 16) + { + // Read 16 channels worth of 16 columns, then zip to produce 16 columns + // worth of 16 channels. + int8x16_t channel_pixels[16]; + + channel_pixels[0] = vld1q_s8(in_row + (c + 0) * in_channel_stride + j); + channel_pixels[1] = vld1q_s8(in_row + (c + 1) * in_channel_stride + j); + channel_pixels[2] = vld1q_s8(in_row + (c + 2) * in_channel_stride + j); + channel_pixels[3] = vld1q_s8(in_row + (c + 3) * in_channel_stride + j); + + channel_pixels[4] = vld1q_s8(in_row + (c + 4) * in_channel_stride + j); + channel_pixels[5] = vld1q_s8(in_row + (c + 5) * in_channel_stride + j); + channel_pixels[6] = vld1q_s8(in_row + (c + 6) * in_channel_stride + j); + channel_pixels[7] = vld1q_s8(in_row + (c + 7) * in_channel_stride + j); + + channel_pixels[8] = vld1q_s8(in_row + (c + 8) * in_channel_stride + j); + channel_pixels[9] = vld1q_s8(in_row + (c + 9) * in_channel_stride + j); + channel_pixels[10] = vld1q_s8(in_row + (c + 10) * in_channel_stride + j); + channel_pixels[11] = vld1q_s8(in_row + (c + 11) * in_channel_stride + j); + + channel_pixels[12] = vld1q_s8(in_row + (c + 12) * in_channel_stride + j); + channel_pixels[13] = vld1q_s8(in_row + (c + 13) * in_channel_stride + j); + channel_pixels[14] = vld1q_s8(in_row + (c + 14) * in_channel_stride + j); + channel_pixels[15] = vld1q_s8(in_row + (c + 15) * in_channel_stride + j); + + // 0th and 8th, 1st and 9th, 2nd and 10th, 3rd and 11th channels + const int8x16x2_t zip1 = vzipq_s8(channel_pixels[0], channel_pixels[8]); + const int8x16x2_t zip2 = vzipq_s8(channel_pixels[1], channel_pixels[9]); + const int8x16x2_t zip3 = vzipq_s8(channel_pixels[2], channel_pixels[10]); + const int8x16x2_t zip4 = vzipq_s8(channel_pixels[3], channel_pixels[11]); + + // 4th and 12th, 5th and 13th, 6th and 14th, 7th and 15th channels + const int8x16x2_t zip5 = vzipq_s8(channel_pixels[4], channel_pixels[12]); + const int8x16x2_t zip6 = vzipq_s8(channel_pixels[5], channel_pixels[13]); + const int8x16x2_t zip7 = vzipq_s8(channel_pixels[6], channel_pixels[14]); + const int8x16x2_t zip8 = vzipq_s8(channel_pixels[7], channel_pixels[15]); + + // 0th, 4th, 8th, 12th channels + const int8x16x2_t zip9 = vzipq_s8(zip1.val[0], zip5.val[0]); + const int8x16x2_t zip10 = vzipq_s8(zip1.val[1], zip5.val[1]); + + // 2nd, 6th, 10th, 14th channels + const int8x16x2_t zip11 = vzipq_s8(zip3.val[0], zip7.val[0]); + const int8x16x2_t zip12 = vzipq_s8(zip3.val[1], zip7.val[1]); + + // 0th, 2nd, 4th, 6th, 8th, 10th, 12th, 14th channels + const int8x16x2_t zip13 = vzipq_s8(zip9.val[0], zip11.val[0]); + const int8x16x2_t zip14 = vzipq_s8(zip9.val[1], zip11.val[1]); + const int8x16x2_t zip15 = vzipq_s8(zip10.val[0], zip12.val[0]); + const int8x16x2_t zip16 = vzipq_s8(zip10.val[1], zip12.val[1]); + + // 1st, 5th, 9th, 13th channels + const int8x16x2_t zip17 = vzipq_s8(zip2.val[0], zip6.val[0]); + const int8x16x2_t zip18 = vzipq_s8(zip2.val[1], zip6.val[1]); + + // 3rd, 7th, 11th, 15th channels + const int8x16x2_t zip19 = vzipq_s8(zip4.val[0], zip8.val[0]); + const int8x16x2_t zip20 = vzipq_s8(zip4.val[1], zip8.val[1]); + + // 1st, 3rd, 5th, 7th, 9th, 11th, 13th, 15th channels + const int8x16x2_t zip21 = vzipq_s8(zip17.val[0], zip19.val[0]); + const int8x16x2_t zip22 = vzipq_s8(zip17.val[1], zip19.val[1]); + const int8x16x2_t zip23 = vzipq_s8(zip18.val[0], zip20.val[0]); + const int8x16x2_t zip24 = vzipq_s8(zip18.val[1], zip20.val[1]); + + // 0th, 1st, 2nd, ..., 15th channels + const int8x16x2_t out_0 = vzipq_s8(zip13.val[0], zip21.val[0]); + const int8x16x2_t out_1 = vzipq_s8(zip13.val[1], zip21.val[1]); + const int8x16x2_t out_2 = vzipq_s8(zip14.val[0], zip22.val[0]); + const int8x16x2_t out_3 = vzipq_s8(zip14.val[1], zip22.val[1]); + const int8x16x2_t out_4 = vzipq_s8(zip15.val[0], zip23.val[0]); + const int8x16x2_t out_5 = vzipq_s8(zip15.val[1], zip23.val[1]); + const int8x16x2_t out_6 = vzipq_s8(zip16.val[0], zip24.val[0]); + const int8x16x2_t out_7 = vzipq_s8(zip16.val[1], zip24.val[1]); + + vst1q_s8(out_row + (j + 0) * out_col_stride + c, out_0.val[0]); + vst1q_s8(out_row + (j + 1) * out_col_stride + c, out_0.val[1]); + vst1q_s8(out_row + (j + 2) * out_col_stride + c, out_1.val[0]); + vst1q_s8(out_row + (j + 3) * out_col_stride + c, out_1.val[1]); + + vst1q_s8(out_row + (j + 4) * out_col_stride + c, out_2.val[0]); + vst1q_s8(out_row + (j + 5) * out_col_stride + c, out_2.val[1]); + vst1q_s8(out_row + (j + 6) * out_col_stride + c, out_3.val[0]); + vst1q_s8(out_row + (j + 7) * out_col_stride + c, out_3.val[1]); + + vst1q_s8(out_row + (j + 8) * out_col_stride + c, out_4.val[0]); + vst1q_s8(out_row + (j + 9) * out_col_stride + c, out_4.val[1]); + vst1q_s8(out_row + (j + 10) * out_col_stride + c, out_5.val[0]); + vst1q_s8(out_row + (j + 11) * out_col_stride + c, out_5.val[1]); + + vst1q_s8(out_row + (j + 12) * out_col_stride + c, out_6.val[0]); + vst1q_s8(out_row + (j + 13) * out_col_stride + c, out_6.val[1]); + vst1q_s8(out_row + (j + 14) * out_col_stride + c, out_7.val[0]); + vst1q_s8(out_row + (j + 15) * out_col_stride + c, out_7.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 16; _j++) + { + const T *const in_col = in_row + j + _j; + T *const out_col = out_row + (j + _j) * out_col_stride; + const T *const in_channel = in_col + c * in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + for (; j_remaining >= 8; j += 8, j_remaining -= 8) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 8; c += 8, c_remaining -= 8) + { + // Read 8 channels worth of 8 columns, then zip to produce 8 columns + // worth of 8 channels. + int8x8_t channel_pixels[8]; + + channel_pixels[0] = vld1_s8(in_row + (c + 0) * in_channel_stride + j); + channel_pixels[1] = vld1_s8(in_row + (c + 1) * in_channel_stride + j); + channel_pixels[2] = vld1_s8(in_row + (c + 2) * in_channel_stride + j); + channel_pixels[3] = vld1_s8(in_row + (c + 3) * in_channel_stride + j); + + channel_pixels[4] = vld1_s8(in_row + (c + 4) * in_channel_stride + j); + channel_pixels[5] = vld1_s8(in_row + (c + 5) * in_channel_stride + j); + channel_pixels[6] = vld1_s8(in_row + (c + 6) * in_channel_stride + j); + channel_pixels[7] = vld1_s8(in_row + (c + 7) * in_channel_stride + j); + + const int8x8x2_t zip1 = vzip_s8(channel_pixels[0], channel_pixels[4]); + const int8x8x2_t zip2 = vzip_s8(channel_pixels[1], channel_pixels[5]); + const int8x8x2_t zip3 = vzip_s8(channel_pixels[2], channel_pixels[6]); + const int8x8x2_t zip4 = vzip_s8(channel_pixels[3], channel_pixels[7]); + + // 0th, 2nd, 4th, 6th channels + const int8x8x2_t zip5 = vzip_s8(zip1.val[0], zip3.val[0]); + const int8x8x2_t zip6 = vzip_s8(zip1.val[1], zip3.val[1]); + + // 1st, 3rd, 5th, 7th channels + const int8x8x2_t zip7 = vzip_s8(zip2.val[0], zip4.val[0]); + const int8x8x2_t zip8 = vzip_s8(zip2.val[1], zip4.val[1]); + + // 0th, 1st, 2nd, ..., 7th channels + const int8x8x2_t out_0 = vzip_s8(zip5.val[0], zip7.val[0]); + const int8x8x2_t out_1 = vzip_s8(zip5.val[1], zip7.val[1]); + const int8x8x2_t out_2 = vzip_s8(zip6.val[0], zip8.val[0]); + const int8x8x2_t out_3 = vzip_s8(zip6.val[1], zip8.val[1]); + + vst1_s8(out_row + (j + 0) * out_col_stride + c, out_0.val[0]); + vst1_s8(out_row + (j + 1) * out_col_stride + c, out_0.val[1]); + vst1_s8(out_row + (j + 2) * out_col_stride + c, out_1.val[0]); + vst1_s8(out_row + (j + 3) * out_col_stride + c, out_1.val[1]); + + vst1_s8(out_row + (j + 4) * out_col_stride + c, out_2.val[0]); + vst1_s8(out_row + (j + 5) * out_col_stride + c, out_2.val[1]); + vst1_s8(out_row + (j + 6) * out_col_stride + c, out_3.val[0]); + vst1_s8(out_row + (j + 7) * out_col_stride + c, out_3.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 8; _j++) + { + const T *const in_col = in_row + j + _j; + T *const out_col = out_row + (j + _j) * out_col_stride; + const T *const in_channel = in_col + c * in_channel_stride; + out_col[c] = *(in_channel); + } + } + } +#endif // __arm_any__ + for (; j_remaining; j++, j_remaining--) + { + const T *const in_col = in_row + j; + T *const out_col = out_row + j * out_col_stride; + + for (int c = 0; c < n_channels; c++) + { + const T *const in_channel = in_col + c * in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + } +} + +template <> +inline void nchw_to_nhwc(const uint8_t *const in, + uint8_t *const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride) +{ + nchw_to_nhwc(reinterpret_cast(in), reinterpret_cast(out), n_batches, n_channels, n_rows, + n_cols, in_batch_stride, in_channel_stride, in_row_stride, out_batch_stride, out_row_stride, + out_col_stride); +} + /*****************************************************************************/ /* Generic implementation : NCHW -> NHWC */ @@ -550,6 +996,502 @@ inline void nhwc_to_nchw( ); } +/*****************************************************************************/ +/* 16-bit implementation : NHWC -> NCHW + */ +template <> +inline void nhwc_to_nchw( + const int16_t* const in, // Input data in NHWC form + int16_t* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + typedef int16_t T; + + // Fill in stride values + in_col_stride = (in_col_stride) ? in_col_stride : n_channels; + in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_rows * in_row_stride; + + out_row_stride = (out_row_stride) ? out_row_stride : n_cols; + out_channel_stride = (out_channel_stride) ? out_channel_stride + : n_rows * out_row_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_channels * out_channel_stride; + + // Perform the re-ordering + // For every batch + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + // For every row + for (int i = 0; i < n_rows; i++) + { + const T* const in_i = in_batch + i*in_row_stride; + T* const out_i = out_batch + i*out_row_stride; + + // For every column, beginning with chunks of 8 + int j = 0, j_remaining = n_cols; +#ifdef __arm_any__ + for (; j_remaining >= 8; j += 8, j_remaining -=8) + { + // For every channel, beginning with chunks of 8 + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 8; c += 8, c_remaining -= 8) + { + // Read 8 columns worth of 8 channels then zip to produce 8 channels + // worth of 8 columns. + int16x8_t pixel_channels[8]; + + pixel_channels[0] = vld1q_s16(in_i + (j + 0)*in_col_stride + c); + pixel_channels[1] = vld1q_s16(in_i + (j + 1)*in_col_stride + c); + pixel_channels[2] = vld1q_s16(in_i + (j + 2)*in_col_stride + c); + pixel_channels[3] = vld1q_s16(in_i + (j + 3)*in_col_stride + c); + pixel_channels[4] = vld1q_s16(in_i + (j + 4)*in_col_stride + c); + pixel_channels[5] = vld1q_s16(in_i + (j + 5)*in_col_stride + c); + pixel_channels[6] = vld1q_s16(in_i + (j + 6)*in_col_stride + c); + pixel_channels[7] = vld1q_s16(in_i + (j + 7)*in_col_stride + c); + + // 0th and 4th, 1st and 5th, 2nd and 6th, 3rd and 7th columns + const int16x8x2_t zip1 = vzipq_s16(pixel_channels[0], pixel_channels[4]); + const int16x8x2_t zip2 = vzipq_s16(pixel_channels[1], pixel_channels[5]); + const int16x8x2_t zip3 = vzipq_s16(pixel_channels[2], pixel_channels[6]); + const int16x8x2_t zip4 = vzipq_s16(pixel_channels[3], pixel_channels[7]); + + // 0th, 2nd, 4th, 6th columns + const int16x8x2_t zip5 = vzipq_s16(zip1.val[0], zip3.val[0]); + const int16x8x2_t zip6 = vzipq_s16(zip1.val[1], zip3.val[1]); + + // 1st, 3rd, 5th, 7th columns + const int16x8x2_t zip7 = vzipq_s16(zip2.val[0], zip4.val[0]); + const int16x8x2_t zip8 = vzipq_s16(zip2.val[1], zip4.val[1]); + + // 0th, 1st, ..., 7th columns + const int16x8x2_t out_0 = vzipq_s16(zip5.val[0], zip7.val[0]); + const int16x8x2_t out_1 = vzipq_s16(zip5.val[1], zip7.val[1]); + const int16x8x2_t out_2 = vzipq_s16(zip6.val[0], zip8.val[0]); + const int16x8x2_t out_3 = vzipq_s16(zip6.val[1], zip8.val[1]); + + // 0th, 1st, 2nd, 3rd columns + vst1q_s16(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]); + vst1q_s16(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]); + vst1q_s16(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]); + vst1q_s16(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]); + + // 4th, 5th, 6th, 7th columns + vst1q_s16(out_i + j + (c + 4)*out_channel_stride, out_2.val[0]); + vst1q_s16(out_i + j + (c + 5)*out_channel_stride, out_2.val[1]); + vst1q_s16(out_i + j + (c + 6)*out_channel_stride, out_3.val[0]); + vst1q_s16(out_i + j + (c + 7)*out_channel_stride, out_3.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 8; _j++) + { + const T* const in_j = in_i + (j + _j)*in_col_stride; + T* const out_j = out_i + (j + _j); + + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + for (; j_remaining >= 4; j += 4, j_remaining -=4) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 4; c += 4, c_remaining -= 4) + { + // Read 4 columns worth of 4 channels then zip to produce 4 channels + // worth of 4 columns. + int16x4_t pixel_channels[4]; + + pixel_channels[0] = vld1_s16(in_i + (j + 0)*in_col_stride + c); + pixel_channels[1] = vld1_s16(in_i + (j + 1)*in_col_stride + c); + pixel_channels[2] = vld1_s16(in_i + (j + 2)*in_col_stride + c); + pixel_channels[3] = vld1_s16(in_i + (j + 3)*in_col_stride + c); + + // 0th and 2nd, 1st and 3rd columns + const int16x4x2_t zip1 = vzip_s16(pixel_channels[0], pixel_channels[2]); + const int16x4x2_t zip2 = vzip_s16(pixel_channels[1], pixel_channels[3]); + + // 0th, 1st, 2nd, 3rd columns + const int16x4x2_t out_0 = vzip_s16(zip1.val[0], zip2.val[0]); + const int16x4x2_t out_1 = vzip_s16(zip1.val[1], zip2.val[1]); + + // 0th, 1st, 2nd, 3rd columns + vst1_s16(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]); + vst1_s16(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]); + vst1_s16(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]); + vst1_s16(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 4; _j++) + { + const T* const in_j = in_i + (j + _j)*in_col_stride; + T* const out_j = out_i + (j + _j); + + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } +#endif // __arm_any__ + for (; j_remaining; j++, j_remaining--) + { + const T* const in_j = in_i + j*in_col_stride; + T* const out_j = out_i + j; + + // For every channel + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + } +} + +template <> +inline void nhwc_to_nchw( + const uint16_t* const in, // Input data in NHWC form + uint16_t* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + nhwc_to_nchw( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_rows, n_cols, n_channels, + in_batch_stride, in_row_stride, in_col_stride, + out_batch_stride, out_channel_stride, out_row_stride + ); +} + +#ifdef ARM_COMPUTE_ENABLE_FP16 +template <> +inline void nhwc_to_nchw( + const float16_t* const in, // Input data in NHWC form + float16_t* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + nhwc_to_nchw( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_rows, n_cols, n_channels, + in_batch_stride, in_row_stride, in_col_stride, + out_batch_stride, out_channel_stride, out_row_stride + ); +} +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + +template <> +inline void nhwc_to_nchw( + const int8_t* const in, // Input data in NHWC form + int8_t* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + typedef int8_t T; + + // Fill in stride values + in_col_stride = (in_col_stride) ? in_col_stride : n_channels; + in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_rows * in_row_stride; + + out_row_stride = (out_row_stride) ? out_row_stride : n_cols; + out_channel_stride = (out_channel_stride) ? out_channel_stride + : n_rows * out_row_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_channels * out_channel_stride; + + // Perform the re-ordering + // For every batch + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + // For every row + for (int i = 0; i < n_rows; i++) + { + const T* const in_i = in_batch + i*in_row_stride; + T* const out_i = out_batch + i*out_row_stride; + + // For every column, beginning with chunks of 16 + int j = 0, j_remaining = n_cols; +#ifdef __arm_any__ + for (; j_remaining >= 16; j += 16, j_remaining -=16) + { + // For every channel, beginning with chunks of 16 + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 16; c += 16, c_remaining -= 16) + { + // Read 16 columns worth of 16 channels then zip to produce 16 channels + // worth of 16 columns. + int8x16_t pixel_channels[16]; + + pixel_channels[0] = vld1q_s8(in_i + (j + 0)*in_col_stride + c); + pixel_channels[1] = vld1q_s8(in_i + (j + 1)*in_col_stride + c); + pixel_channels[2] = vld1q_s8(in_i + (j + 2)*in_col_stride + c); + pixel_channels[3] = vld1q_s8(in_i + (j + 3)*in_col_stride + c); + + pixel_channels[4] = vld1q_s8(in_i + (j + 4)*in_col_stride + c); + pixel_channels[5] = vld1q_s8(in_i + (j + 5)*in_col_stride + c); + pixel_channels[6] = vld1q_s8(in_i + (j + 6)*in_col_stride + c); + pixel_channels[7] = vld1q_s8(in_i + (j + 7)*in_col_stride + c); + + pixel_channels[8] = vld1q_s8(in_i + (j + 8)*in_col_stride + c); + pixel_channels[9] = vld1q_s8(in_i + (j + 9)*in_col_stride + c); + pixel_channels[10] = vld1q_s8(in_i + (j + 10)*in_col_stride + c); + pixel_channels[11] = vld1q_s8(in_i + (j + 11)*in_col_stride + c); + + pixel_channels[12] = vld1q_s8(in_i + (j + 12)*in_col_stride + c); + pixel_channels[13] = vld1q_s8(in_i + (j + 13)*in_col_stride + c); + pixel_channels[14] = vld1q_s8(in_i + (j + 14)*in_col_stride + c); + pixel_channels[15] = vld1q_s8(in_i + (j + 15)*in_col_stride + c); + + // 0th and 8th, 1st and 9th, 2nd and 10th, 3rd and 11th columns + const int8x16x2_t zip1 = vzipq_s8(pixel_channels[0], pixel_channels[8]); + const int8x16x2_t zip2 = vzipq_s8(pixel_channels[1], pixel_channels[9]); + const int8x16x2_t zip3 = vzipq_s8(pixel_channels[2], pixel_channels[10]); + const int8x16x2_t zip4 = vzipq_s8(pixel_channels[3], pixel_channels[11]); + + // 4th and 12th, 5th and 13th, 6th and 14th, 7th and 15th columns + const int8x16x2_t zip5 = vzipq_s8(pixel_channels[4], pixel_channels[12]); + const int8x16x2_t zip6 = vzipq_s8(pixel_channels[5], pixel_channels[13]); + const int8x16x2_t zip7 = vzipq_s8(pixel_channels[6], pixel_channels[14]); + const int8x16x2_t zip8 = vzipq_s8(pixel_channels[7], pixel_channels[15]); + + // 0th, 4th, 8th, 12th columns + const int8x16x2_t zip9 = vzipq_s8(zip1.val[0], zip5.val[0]); + const int8x16x2_t zip10 = vzipq_s8(zip1.val[1], zip5.val[1]); + + // 2nd, 6th, 10th, 14th columns + const int8x16x2_t zip11 = vzipq_s8(zip3.val[0], zip7.val[0]); + const int8x16x2_t zip12 = vzipq_s8(zip3.val[1], zip7.val[1]); + + // 0th, 2nd, 4th, 6th, 8th, 10th, 12th, 14th columns + const int8x16x2_t zip13 = vzipq_s8(zip9.val[0], zip11.val[0]); + const int8x16x2_t zip14 = vzipq_s8(zip9.val[1], zip11.val[1]); + const int8x16x2_t zip15 = vzipq_s8(zip10.val[0], zip12.val[0]); + const int8x16x2_t zip16 = vzipq_s8(zip10.val[1], zip12.val[1]); + + // 1st, 5th, 9th, 13th columns + const int8x16x2_t zip17 = vzipq_s8(zip2.val[0], zip6.val[0]); + const int8x16x2_t zip18 = vzipq_s8(zip2.val[1], zip6.val[1]); + + // 3rd, 7th, 11th, 15th columns + const int8x16x2_t zip19 = vzipq_s8(zip4.val[0], zip8.val[0]); + const int8x16x2_t zip20 = vzipq_s8(zip4.val[1], zip8.val[1]); + + // 1st, 3rd, 5th, 7th, 9th, 11th, 13th, 15th columns + const int8x16x2_t zip21 = vzipq_s8(zip17.val[0], zip19.val[0]); + const int8x16x2_t zip22 = vzipq_s8(zip17.val[1], zip19.val[1]); + const int8x16x2_t zip23 = vzipq_s8(zip18.val[0], zip20.val[0]); + const int8x16x2_t zip24 = vzipq_s8(zip18.val[1], zip20.val[1]); + + // 0th, 1st, 2nd, 4th, ..., 15th columns + const int8x16x2_t out_0 = vzipq_s8(zip13.val[0], zip21.val[0]); + const int8x16x2_t out_1 = vzipq_s8(zip13.val[1], zip21.val[1]); + const int8x16x2_t out_2 = vzipq_s8(zip14.val[0], zip22.val[0]); + const int8x16x2_t out_3 = vzipq_s8(zip14.val[1], zip22.val[1]); + const int8x16x2_t out_4 = vzipq_s8(zip15.val[0], zip23.val[0]); + const int8x16x2_t out_5 = vzipq_s8(zip15.val[1], zip23.val[1]); + const int8x16x2_t out_6 = vzipq_s8(zip16.val[0], zip24.val[0]); + const int8x16x2_t out_7 = vzipq_s8(zip16.val[1], zip24.val[1]); + + // 0th, 1st, 2nd, 3rd columns + vst1q_s8(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]); + vst1q_s8(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]); + vst1q_s8(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]); + vst1q_s8(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]); + + // 4th, 5th, 6th, 7th columns + vst1q_s8(out_i + j + (c + 4)*out_channel_stride, out_2.val[0]); + vst1q_s8(out_i + j + (c + 5)*out_channel_stride, out_2.val[1]); + vst1q_s8(out_i + j + (c + 6)*out_channel_stride, out_3.val[0]); + vst1q_s8(out_i + j + (c + 7)*out_channel_stride, out_3.val[1]); + + // 8th, 9th, 10th, 11th columns + vst1q_s8(out_i + j + (c + 8)*out_channel_stride, out_4.val[0]); + vst1q_s8(out_i + j + (c + 9)*out_channel_stride, out_4.val[1]); + vst1q_s8(out_i + j + (c + 10)*out_channel_stride, out_5.val[0]); + vst1q_s8(out_i + j + (c + 11)*out_channel_stride, out_5.val[1]); + + // 12th, 13th, 14th, 15th columns + vst1q_s8(out_i + j + (c + 12)*out_channel_stride, out_6.val[0]); + vst1q_s8(out_i + j + (c + 13)*out_channel_stride, out_6.val[1]); + vst1q_s8(out_i + j + (c + 14)*out_channel_stride, out_7.val[0]); + vst1q_s8(out_i + j + (c + 15)*out_channel_stride, out_7.val[1]); + + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 16; _j++) + { + const T* const in_j = in_i + (j + _j)*in_col_stride; + T* const out_j = out_i + (j + _j); + + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + for (; j_remaining >= 8; j += 8, j_remaining -= 8) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 8; c += 8, c_remaining -= 8) + { + // Read 8 columns worth of 8 channels then zip to produce 8 channels + // worth of 8 columns. + int8x8_t pixel_channels[8]; + + pixel_channels[0] = vld1_s8(in_i + (j + 0)*in_col_stride + c); + pixel_channels[1] = vld1_s8(in_i + (j + 1)*in_col_stride + c); + pixel_channels[2] = vld1_s8(in_i + (j + 2)*in_col_stride + c); + pixel_channels[3] = vld1_s8(in_i + (j + 3)*in_col_stride + c); + + pixel_channels[4] = vld1_s8(in_i + (j + 4)*in_col_stride + c); + pixel_channels[5] = vld1_s8(in_i + (j + 5)*in_col_stride + c); + pixel_channels[6] = vld1_s8(in_i + (j + 6)*in_col_stride + c); + pixel_channels[7] = vld1_s8(in_i + (j + 7)*in_col_stride + c); + + // 0th and 4th, 1st and 5th, 2nd and 6th, 3rd and 7th columns + const int8x8x2_t zip1 = vzip_s8(pixel_channels[0], pixel_channels[4]); + const int8x8x2_t zip2 = vzip_s8(pixel_channels[1], pixel_channels[5]); + const int8x8x2_t zip3 = vzip_s8(pixel_channels[2], pixel_channels[6]); + const int8x8x2_t zip4 = vzip_s8(pixel_channels[3], pixel_channels[7]); + + // 0th, 2nd, 4th, 6th columns + const int8x8x2_t zip5 = vzip_s8(zip1.val[0], zip3.val[0]); + const int8x8x2_t zip6 = vzip_s8(zip1.val[1], zip3.val[1]); + + // 1st, 3rd, 5th, 7th columns + const int8x8x2_t zip7 = vzip_s8(zip2.val[0], zip4.val[0]); + const int8x8x2_t zip8 = vzip_s8(zip2.val[1], zip4.val[1]); + + // 0th, 1st, ..., 7th columns + const int8x8x2_t out_0 = vzip_s8(zip5.val[0], zip7.val[0]); + const int8x8x2_t out_1 = vzip_s8(zip5.val[1], zip7.val[1]); + const int8x8x2_t out_2 = vzip_s8(zip6.val[0], zip8.val[0]); + const int8x8x2_t out_3 = vzip_s8(zip6.val[1], zip8.val[1]); + + // 0th, 1st, 2nd, 3rd columns + vst1_s8(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]); + vst1_s8(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]); + vst1_s8(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]); + vst1_s8(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]); + + // 4th, 5th, 6th, 7th columns + vst1_s8(out_i + j + (c + 4)*out_channel_stride, out_2.val[0]); + vst1_s8(out_i + j + (c + 5)*out_channel_stride, out_2.val[1]); + vst1_s8(out_i + j + (c + 6)*out_channel_stride, out_3.val[0]); + vst1_s8(out_i + j + (c + 7)*out_channel_stride, out_3.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 8; _j++) + { + const T* const in_j = in_i + (j + _j)*in_col_stride; + T* const out_j = out_i + (j + _j); + + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } +#endif // __arm_any__ + for (; j_remaining; j++, j_remaining--) + { + const T* const in_j = in_i + j*in_col_stride; + T* const out_j = out_i + j; + + // For every channel + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + } + +} + +template <> +inline void nhwc_to_nchw( + const uint8_t* const in, // Input data in NHWC form + uint8_t* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + nhwc_to_nchw( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_rows, n_cols, n_channels, + in_batch_stride, in_row_stride, in_col_stride, + out_batch_stride, out_channel_stride, out_row_stride + ); +} + /*****************************************************************************/ /* Generic implementation : NHWC -> NCHW */ diff --git a/src/cpu/kernels/CpuDirectConv3dKernel.cpp b/src/cpu/kernels/CpuDirectConv3dKernel.cpp index b5b2aed1ba..9c37ece3dd 100644 --- a/src/cpu/kernels/CpuDirectConv3dKernel.cpp +++ b/src/cpu/kernels/CpuDirectConv3dKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,8 +25,8 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Steps.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" @@ -35,10 +35,8 @@ #include "src/core/common/Registrars.h" #include "src/core/CPP/Validate.h" #include "src/core/helpers/AutoConfiguration.h" -#include "src/core/NEON/wrapper/wrapper.h" -#include "src/cpu/kernels/conv3d/neon/list.h" - -#include +#include "src/core/helpers/WindowHelpers.h" +#include "src/cpu/kernels/conv3d/list.h" using namespace arm_compute::detail; @@ -51,18 +49,16 @@ namespace kernels namespace { static const std::vector available_kernels = { -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) {"neon_fp16_directconv3d", [](const DataTypeISASelectorData &data) { return data.dt == DataType::F16 && data.isa.fp16; }, - REGISTER_FP16_NEON(arm_compute::cpu::directconv3d_float_neon_ndhwc)}, -#endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ + REGISTER_FP16_NEON(directconv3d_fp16_neon_ndhwc)}, {"neon_fp32_directconv3d", [](const DataTypeISASelectorData &data) { return data.dt == DataType::F32; }, - REGISTER_FP32_NEON(arm_compute::cpu::directconv3d_float_neon_ndhwc)}, + REGISTER_FP32_NEON(directconv3d_fp32_neon_ndhwc)}, {"neon_qasymm8_directconv3d", [](const DataTypeISASelectorData &data) { return data.dt == DataType::QASYMM8; }, - REGISTER_QASYMM8_NEON(arm_compute::cpu::directconv3d_quantized_neon_ndhwc)}, + REGISTER_QASYMM8_NEON(directconv3d_qu8_neon_ndhwc)}, {"neon_qasymm8_signed_directconv3d", [](const DataTypeISASelectorData &data) { return data.dt == DataType::QASYMM8_SIGNED; }, - REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::directconv3d_quantized_neon_ndhwc)}}; + REGISTER_QASYMM8_SIGNED_NEON(directconv3d_qs8_neon_ndhwc)}}; Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, diff --git a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp index a3ed2cd171..5b88735e7a 100644 --- a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp +++ b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/cpu/kernels/CpuPermuteKernel.cpp b/src/cpu/kernels/CpuPermuteKernel.cpp index b444a25ff7..c6e0dd3a5e 100644 --- a/src/cpu/kernels/CpuPermuteKernel.cpp +++ b/src/cpu/kernels/CpuPermuteKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -97,15 +97,12 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const template void run_permute(const Window &window, const ITensor *src, const ITensor *dst, const PermutationVector &perm) { - const DataLayout src_layout = src->info()->data_layout(); - // Source window Window window_src = window; // we only support these two configs in src/core/NEON/kernels/convolution/common/shims.hpp, for all others // we have to fall back to C++ - if ((src_layout == DataLayout::NCHW && perm == PermutationVector{2U, 0U, 1U}) || - (src_layout == DataLayout::NHWC && perm == PermutationVector{1U, 2U, 0U})) + if (perm == PermutationVector{2U, 0U, 1U} || perm == PermutationVector{1U, 2U, 0U}) { window_src.set(Window::DimX, Window::Dimension(window.x().start(), window.x().end(), window.x().end() - window.x().start())); @@ -128,49 +125,16 @@ void run_permute(const Window &window, const ITensor *src, const ITensor *dst, c Iterator src_it(src, window_src); Iterator dst_it(dst, window_dst); - int in_row_stride = 0; - int in_col_stride = 0; - int in_channel_stride = 0; - int in_batch_stride = 0; - int n_cols = 0; - int n_rows = 0; - int n_channels = 0; - int n_batches = 0; - - switch (src_layout) - { - case DataLayout::NCHW: - { - in_row_stride = src->info()->strides_in_bytes().y() / sizeof(T); - in_channel_stride = src->info()->strides_in_bytes().z() / sizeof(T); - in_batch_stride = src->info()->strides_in_bytes()[3] / sizeof(T); - n_cols = src->info()->tensor_shape().x(); - n_rows = window_src.y().step(); - n_channels = src->info()->tensor_shape().z(); - n_batches = src->info()->tensor_shape()[3]; - break; - } - case DataLayout::NHWC: - { - in_col_stride = src->info()->strides_in_bytes().y() / sizeof(T); - in_row_stride = src->info()->strides_in_bytes().z() / sizeof(T); - in_batch_stride = src->info()->strides_in_bytes()[3] / sizeof(T); - n_channels = src->info()->tensor_shape().x(); - n_cols = window_src.y().step(); - n_rows = src->info()->tensor_shape().z(); - n_batches = src->info()->tensor_shape()[3]; - break; - } - default: - { - ARM_COMPUTE_ERROR("Invalid source data layout."); - break; - } - } - // CHW -> HWC - if (src_layout == DataLayout::NCHW && perm == PermutationVector{2U, 0U, 1U}) + if (perm == PermutationVector{2U, 0U, 1U}) { + const int in_row_stride = src->info()->strides_in_bytes().y() / sizeof(T); + const int in_channel_stride = src->info()->strides_in_bytes().z() / sizeof(T); + const int in_batch_stride = src->info()->strides_in_bytes()[3] / sizeof(T); + const int n_cols = src->info()->tensor_shape().x(); + const int n_rows = window_src.y().step(); + const int n_channels = src->info()->tensor_shape().z(); + const int n_batches = src->info()->tensor_shape()[3]; const int out_channel_stride = dst->info()->strides_in_bytes().x() / sizeof(T); const int out_col_stride = dst->info()->strides_in_bytes().y() / sizeof(T); const int out_row_stride = dst->info()->strides_in_bytes().z() / sizeof(T); @@ -188,8 +152,15 @@ void run_permute(const Window &window, const ITensor *src, const ITensor *dst, c src_it, dst_it); } // HWC -> CHW - else if (src_layout == DataLayout::NHWC && perm == PermutationVector{1U, 2U, 0U}) + else if (perm == PermutationVector{1U, 2U, 0U}) { + const int in_col_stride = src->info()->strides_in_bytes().y() / sizeof(T); + const int in_row_stride = src->info()->strides_in_bytes().z() / sizeof(T); + const int in_batch_stride = src->info()->strides_in_bytes()[3] / sizeof(T); + const int n_channels = src->info()->tensor_shape().x(); + const int n_cols = window_src.y().step(); + const int n_rows = src->info()->tensor_shape().z(); + const int n_batches = src->info()->tensor_shape()[3]; const int out_col_stride = dst->info()->strides_in_bytes().x() / sizeof(T); const int out_row_stride = dst->info()->strides_in_bytes().y() / sizeof(T); const int out_channel_stride = dst->info()->strides_in_bytes().z() / sizeof(T); diff --git a/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h b/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h index 6e8f32ef47..e2a27675b3 100644 --- a/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h +++ b/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022 Arm Limited. + * Copyright (c) 2018-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H -#define ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H +#ifndef ACL_SRC_CPU_KERNELS_ASSEMBLY_CPUGEMMASSEMBLYWRAPPERKERNEL_H +#define ACL_SRC_CPU_KERNELS_ASSEMBLY_CPUGEMMASSEMBLYWRAPPERKERNEL_H #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" @@ -137,4 +137,4 @@ class CpuGemmAssemblyWrapperKernel final : public INEKernel } // namespace kernel } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H */ +#endif // ACL_SRC_CPU_KERNELS_ASSEMBLY_CPUGEMMASSEMBLYWRAPPERKERNEL_H diff --git a/src/cpu/kernels/assembly/convolution_parameters.hpp b/src/cpu/kernels/assembly/convolution_parameters.hpp index 0c1ae58902..a6cf96344c 100644 --- a/src/cpu/kernels/assembly/convolution_parameters.hpp +++ b/src/cpu/kernels/assembly/convolution_parameters.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,6 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + +#ifndef ACL_SRC_CPU_KERNELS_ASSEMBLY_CONVOLUTION_PARAMETERS_HPP +#define ACL_SRC_CPU_KERNELS_ASSEMBLY_CONVOLUTION_PARAMETERS_HPP + #pragma once #include @@ -63,3 +67,5 @@ struct ConvolutionParameters }; } // namespace arm_gemm + +#endif // ACL_SRC_CPU_KERNELS_ASSEMBLY_CONVOLUTION_PARAMETERS_HPP diff --git a/src/cpu/kernels/conv3d/neon/list.h b/src/cpu/kernels/conv3d/generic/neon/float_impl.h similarity index 96% rename from src/cpu/kernels/conv3d/neon/list.h rename to src/cpu/kernels/conv3d/generic/neon/float_impl.h index 082c60be29..5b5611a02f 100644 --- a/src/cpu/kernels/conv3d/neon/list.h +++ b/src/cpu/kernels/conv3d/generic/neon/float_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,21 +21,25 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_NEON_KERNELS_CONV3D_LIST_H -#define SRC_CORE_NEON_KERNELS_CONV3D_LIST_H +#ifndef ACL_SRC_CPU_KERNELS_CONV3D_GENERIC_NEON_FLOAT_IMPL_H +#define ACL_SRC_CPU_KERNELS_CONV3D_GENERIC_NEON_FLOAT_IMPL_H +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/Traits.h" +#include "arm_compute/core/Window.h" #include "arm_compute/runtime/FunctionDescriptors.h" #include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/wrapper/wrapper.h" -#include "src/cpu/kernels/conv3d/neon/quantized.h" namespace arm_compute { namespace cpu { +namespace kernels +{ + template void directconv3d_float_neon_ndhwc(const ITensor *src0, const ITensor *src1, @@ -192,6 +196,7 @@ void directconv3d_float_neon_ndhwc(const ITensor *src0, out); } +} // namespace kernels } // namespace cpu } // namespace arm_compute -#endif // SRC_CORE_NEON_KERNELS_CONV3D_LIST_H +#endif // ACL_SRC_CPU_KERNELS_CONV3D_GENERIC_NEON_FLOAT_IMPL_H diff --git a/src/cpu/kernels/conv3d/generic/neon/fp16.cpp b/src/cpu/kernels/conv3d/generic/neon/fp16.cpp new file mode 100644 index 0000000000..1737556e51 --- /dev/null +++ b/src/cpu/kernels/conv3d/generic/neon/fp16.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/cpu/kernels/conv3d/generic/neon/float_impl.h" + +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ +void directconv3d_fp16_neon_ndhwc(const ITensor *src0, + const ITensor *src1, + const ITensor *src2, + ITensor *dst, + const Conv3dInfo &conv_info, + const Window &window) +{ + directconv3d_float_neon_ndhwc(src0, src1, src2, dst, conv_info, window); +} + +} // namespace kernels +} // namespace cpu +} // namespace arm_compute + +#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) diff --git a/src/cpu/kernels/conv3d/generic/neon/fp32.cpp b/src/cpu/kernels/conv3d/generic/neon/fp32.cpp new file mode 100644 index 0000000000..1cd0793442 --- /dev/null +++ b/src/cpu/kernels/conv3d/generic/neon/fp32.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/cpu/kernels/conv3d/generic/neon/float_impl.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ + +void directconv3d_fp32_neon_ndhwc(const ITensor *src0, + const ITensor *src1, + const ITensor *src2, + ITensor *dst, + const Conv3dInfo &conv_info, + const Window &window) +{ + directconv3d_float_neon_ndhwc(src0, src1, src2, dst, conv_info, window); +} + +} // namespace kernels +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/conv3d/generic/neon/qasymm8.cpp b/src/cpu/kernels/conv3d/generic/neon/qasymm8.cpp new file mode 100644 index 0000000000..d0cb6fc1c1 --- /dev/null +++ b/src/cpu/kernels/conv3d/generic/neon/qasymm8.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/cpu/kernels/conv3d/generic/neon/quantized_impl.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ + +void directconv3d_qu8_neon_ndhwc(const ITensor *src0, + const ITensor *src1, + const ITensor *src2, + ITensor *dst, + const Conv3dInfo &conv_info, + const Window &window) +{ + directconv3d_quantized_neon_ndhwc(src0, src1, src2, dst, conv_info, window); +} + +} // namespace kernels +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/conv3d/generic/neon/qasymm8_signed.cpp b/src/cpu/kernels/conv3d/generic/neon/qasymm8_signed.cpp new file mode 100644 index 0000000000..adffc1a3f8 --- /dev/null +++ b/src/cpu/kernels/conv3d/generic/neon/qasymm8_signed.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/cpu/kernels/conv3d/generic/neon/quantized_impl.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ + +void directconv3d_qs8_neon_ndhwc(const ITensor *src0, + const ITensor *src1, + const ITensor *src2, + ITensor *dst, + const Conv3dInfo &conv_info, + const Window &window) +{ + directconv3d_quantized_neon_ndhwc(src0, src1, src2, dst, conv_info, window); +} + +} // namespace kernels +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/conv3d/neon/quantized.h b/src/cpu/kernels/conv3d/generic/neon/quantized_impl.h similarity index 98% rename from src/cpu/kernels/conv3d/neon/quantized.h rename to src/cpu/kernels/conv3d/generic/neon/quantized_impl.h index f0fc9b5a71..b6b41035f8 100644 --- a/src/cpu/kernels/conv3d/neon/quantized.h +++ b/src/cpu/kernels/conv3d/generic/neon/quantized_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,9 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_NEON_KERNELS_CONV3D_QUANTIZED_H -#define SRC_CORE_NEON_KERNELS_CONV3D_QUANTIZED_H +#ifndef ACL_SRC_CPU_KERNELS_CONV3D_GENERIC_NEON_QUANTIZED_IMPL_H +#define ACL_SRC_CPU_KERNELS_CONV3D_GENERIC_NEON_QUANTIZED_IMPL_H +#include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" @@ -37,6 +38,8 @@ namespace arm_compute { namespace cpu { +namespace kernels +{ template void directconv3d_quantized_neon_ndhwc(const ITensor *src0, const ITensor *src1, @@ -270,6 +273,7 @@ void directconv3d_quantized_neon_ndhwc(const ITensor *src0, }, out); } +} // namespace kernels } // namespace cpu } // namespace arm_compute -#endif // SRC_CORE_NEON_KERNELS_CONV3D_QUANTIZED_H +#endif // ACL_SRC_CPU_KERNELS_CONV3D_GENERIC_NEON_QUANTIZED_IMPL_H diff --git a/src/cpu/kernels/conv3d/list.h b/src/cpu/kernels/conv3d/list.h new file mode 100644 index 0000000000..256d28825d --- /dev/null +++ b/src/cpu/kernels/conv3d/list.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_KERNELS_CONV3D_LIST_H +#define ACL_SRC_CPU_KERNELS_CONV3D_LIST_H + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ + +#define DECLARE_CONV3D_KERNEL(func_name) \ + void func_name(const ITensor *src0, const ITensor *src1, const ITensor *src2, ITensor *dst, \ + const Conv3dInfo &conv_info, const Window &window) + +DECLARE_CONV3D_KERNEL(directconv3d_fp16_neon_ndhwc); +DECLARE_CONV3D_KERNEL(directconv3d_fp32_neon_ndhwc); +DECLARE_CONV3D_KERNEL(directconv3d_qu8_neon_ndhwc); +DECLARE_CONV3D_KERNEL(directconv3d_qs8_neon_ndhwc); +#undef DECLARE_CONV3D_KERNEL + +} // namespace kernels +} // namespace cpu +} // namespace arm_compute +#endif // ACL_SRC_CPU_KERNELS_CONV3D_LIST_H diff --git a/src/cpu/kernels/elementwise_binary/generic/sve/impl.cpp b/src/cpu/kernels/elementwise_binary/generic/sve/impl.cpp index fa48407e9b..52668a6b5c 100644 --- a/src/cpu/kernels/elementwise_binary/generic/sve/impl.cpp +++ b/src/cpu/kernels/elementwise_binary/generic/sve/impl.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -280,12 +280,6 @@ svint32_t elementwise_pow(svbool_t &pg, const svint32_t &a, const svi return svcvt_s32_z(pg, svpow_z(pg, svcvt_f32_z(pg, a), svcvt_f32_z(pg, b))); } -template <> -svint32_t elementwise_div(svbool_t &pg, const svint32_t &a, const svint32_t &b) -{ - return svcvt_s32_z(pg, svdiv_z(pg, svcvt_f32_z(pg, a), svcvt_f32_z(pg, b))); -} - template <> svint16_t elementwise_div(svbool_t &pg, const svint16_t &a, const svint16_t &b) { diff --git a/src/cpu/kernels/gemm_matrix_mul/generic/neon/impl.cpp b/src/cpu/kernels/gemm_matrix_mul/generic/neon/impl.cpp index 404d070a37..580fdc3e8f 100644 --- a/src/cpu/kernels/gemm_matrix_mul/generic/neon/impl.cpp +++ b/src/cpu/kernels/gemm_matrix_mul/generic/neon/impl.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -81,7 +81,7 @@ void vector_matrix_multiply_f32( // window_end_x is computed above which may cause out-of-bound writes to the dst. for (; x < (window_end_x - window_step_x); x += window_step_x) { - if (x > width_matrix_b) + if (x >= width_matrix_b) { return; } @@ -203,7 +203,7 @@ void vector_matrix_multiply_f32( // Left-over loop for (; x < window_end_x; ++x) { - if (x > width_matrix_b) + if (x >= width_matrix_b) { return; } @@ -309,9 +309,21 @@ void matrix_matrix_multiply_f32( Iterator inb(rhs, win_b); Iterator out(dst, window); - const bool multiply_alpha = !(helpers::float_ops::is_one(alpha)); + // End address of matrix B at batch number n + const float *end_addr_mtx_b_at_batch_n = + reinterpret_cast(inb.ptr()) + rhs->info()->dimension(0) * rhs->info()->dimension(1); + std::vector end_addr_mtx_b_per_batch = {}; + const bool multiply_alpha = !(helpers::float_ops::is_one(alpha)); + const float32x4_t alpha_f32 = vdupq_n_f32(alpha); + const size_t out_dim2 = static_cast(dst->info()->dimension(2)); - const float32x4_t alpha_f32 = vdupq_n_f32(alpha); + for (size_t b = 0; b < out_dim2; ++b) + { + // Store the ptrs to the last elem in the tensor for each batch + end_addr_mtx_b_per_batch.push_back(end_addr_mtx_b_at_batch_n); + end_addr_mtx_b_at_batch_n += + rhs->info()->dimension(2) != 1 ? rhs->info()->dimension(0) * rhs->info()->dimension(1) : 0; + } // The implementation assumes that the matrix A and Matrix B have been reshaped respectively with CpuGemmInterleave4x4 and CpuGemmTranspose1xW // The reshaping of the matrices helps to have a cache friendly implementation and helps to avoid the data re-arrangements needed for computing 16x4 elements per iteration @@ -341,220 +353,374 @@ void matrix_matrix_multiply_f32( #endif /* __arm__ */ auto mtx_b0_end_addr = mtx_b0 + num_elems_matrix_b_x; - for (; mtx_b0 <= (mtx_b0_end_addr - 32);) + + ARM_COMPUTE_ERROR_ON(end_addr_mtx_b_per_batch.size() == 0); + if (mtx_b1 < end_addr_mtx_b_per_batch[id.z()]) { - float32x4_t a0 = vld1q_dup_f32(mtx_a0 + 0); - float32x4_t a1 = vld1q_dup_f32(mtx_a0 + 1); - float32x4_t a2 = vld1q_dup_f32(mtx_a0 + 2); - float32x4_t a3 = vld1q_dup_f32(mtx_a0 + 3); + for (; mtx_b0 < (mtx_b0_end_addr - 32);) + { + float32x4_t a0 = vld1q_dup_f32(mtx_a0 + 0); + float32x4_t a1 = vld1q_dup_f32(mtx_a0 + 1); + float32x4_t a2 = vld1q_dup_f32(mtx_a0 + 2); + float32x4_t a3 = vld1q_dup_f32(mtx_a0 + 3); - float32x4_t b00 = vld1q_f32(mtx_b0); - float32x4_t b10 = vld1q_f32(mtx_b1); - float32x4_t b01 = vld1q_f32(mtx_b0 + 4); - float32x4_t b11 = vld1q_f32(mtx_b1 + 4); + float32x4_t b00 = vld1q_f32(mtx_b0); + float32x4_t b10 = vld1q_f32(mtx_b1); + float32x4_t b01 = vld1q_f32(mtx_b0 + 4); + float32x4_t b11 = vld1q_f32(mtx_b1 + 4); #if __arm__ - asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_a0))); - asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b0))); - asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b1))); + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_a0))); + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b0))); + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b1))); #endif /* __arm__ */ - // 4x4 block 0 - acc00 = vmlaq_f32(acc00, b00, a0); - acc10 = vmlaq_f32(acc10, b00, a1); - acc20 = vmlaq_f32(acc20, b00, a2); - acc30 = vmlaq_f32(acc30, b00, a3); - - float32x4_t a4 = vld1q_dup_f32(mtx_a0 + 4); - float32x4_t a5 = vld1q_dup_f32(mtx_a0 + 5); - float32x4_t a6 = vld1q_dup_f32(mtx_a0 + 6); - float32x4_t a7 = vld1q_dup_f32(mtx_a0 + 7); - - // 4x4 block 1 - acc01 = vmlaq_f32(acc01, b10, a0); - acc11 = vmlaq_f32(acc11, b10, a1); - acc21 = vmlaq_f32(acc21, b10, a2); - acc31 = vmlaq_f32(acc31, b10, a3); - - // 4x4 block 0 - acc00 = vmlaq_f32(acc00, b01, a4); - acc10 = vmlaq_f32(acc10, b01, a5); - acc20 = vmlaq_f32(acc20, b01, a6); - acc30 = vmlaq_f32(acc30, b01, a7); - - // 4x4 block 1 - acc01 = vmlaq_f32(acc01, b11, a4); - acc11 = vmlaq_f32(acc11, b11, a5); - acc21 = vmlaq_f32(acc21, b11, a6); - acc31 = vmlaq_f32(acc31, b11, a7); - - mtx_a0 += 8; - mtx_b0 += 8; - mtx_b1 += 8; - - a0 = vld1q_dup_f32(mtx_a0 + 0); - a1 = vld1q_dup_f32(mtx_a0 + 1); - a2 = vld1q_dup_f32(mtx_a0 + 2); - a3 = vld1q_dup_f32(mtx_a0 + 3); - - b00 = vld1q_f32(mtx_b0); - b10 = vld1q_f32(mtx_b1); - b01 = vld1q_f32(mtx_b0 + 4); - b11 = vld1q_f32(mtx_b1 + 4); - - // 4x4 block 0 - acc00 = vmlaq_f32(acc00, b00, a0); - acc10 = vmlaq_f32(acc10, b00, a1); - acc20 = vmlaq_f32(acc20, b00, a2); - acc30 = vmlaq_f32(acc30, b00, a3); - - a4 = vld1q_dup_f32(mtx_a0 + 4); - a5 = vld1q_dup_f32(mtx_a0 + 5); - a6 = vld1q_dup_f32(mtx_a0 + 6); - a7 = vld1q_dup_f32(mtx_a0 + 7); - - // 4x4 block 1 - acc01 = vmlaq_f32(acc01, b10, a0); - acc11 = vmlaq_f32(acc11, b10, a1); - acc21 = vmlaq_f32(acc21, b10, a2); - acc31 = vmlaq_f32(acc31, b10, a3); - - // 4x4 block 0 - acc00 = vmlaq_f32(acc00, b01, a4); - acc10 = vmlaq_f32(acc10, b01, a5); - acc20 = vmlaq_f32(acc20, b01, a6); - acc30 = vmlaq_f32(acc30, b01, a7); - - // 4x4 block 1 - acc01 = vmlaq_f32(acc01, b11, a4); - acc11 = vmlaq_f32(acc11, b11, a5); - acc21 = vmlaq_f32(acc21, b11, a6); - acc31 = vmlaq_f32(acc31, b11, a7); - - mtx_a0 += 8; - mtx_b0 += 8; - mtx_b1 += 8; - - a0 = vld1q_dup_f32(mtx_a0 + 0); - a1 = vld1q_dup_f32(mtx_a0 + 1); - a2 = vld1q_dup_f32(mtx_a0 + 2); - a3 = vld1q_dup_f32(mtx_a0 + 3); - b00 = vld1q_f32(mtx_b0); - b10 = vld1q_f32(mtx_b1); - b01 = vld1q_f32(mtx_b0 + 4); - b11 = vld1q_f32(mtx_b1 + 4); + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + float32x4_t a4 = vld1q_dup_f32(mtx_a0 + 4); + float32x4_t a5 = vld1q_dup_f32(mtx_a0 + 5); + float32x4_t a6 = vld1q_dup_f32(mtx_a0 + 6); + float32x4_t a7 = vld1q_dup_f32(mtx_a0 + 7); + + // 4x4 block 1 + acc01 = vmlaq_f32(acc01, b10, a0); + acc11 = vmlaq_f32(acc11, b10, a1); + acc21 = vmlaq_f32(acc21, b10, a2); + acc31 = vmlaq_f32(acc31, b10, a3); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b01, a4); + acc10 = vmlaq_f32(acc10, b01, a5); + acc20 = vmlaq_f32(acc20, b01, a6); + acc30 = vmlaq_f32(acc30, b01, a7); + + // 4x4 block 1 + acc01 = vmlaq_f32(acc01, b11, a4); + acc11 = vmlaq_f32(acc11, b11, a5); + acc21 = vmlaq_f32(acc21, b11, a6); + acc31 = vmlaq_f32(acc31, b11, a7); + + mtx_a0 += 8; + mtx_b0 += 8; + mtx_b1 += 8; + + a0 = vld1q_dup_f32(mtx_a0 + 0); + a1 = vld1q_dup_f32(mtx_a0 + 1); + a2 = vld1q_dup_f32(mtx_a0 + 2); + a3 = vld1q_dup_f32(mtx_a0 + 3); + + b00 = vld1q_f32(mtx_b0); + b10 = vld1q_f32(mtx_b1); + b01 = vld1q_f32(mtx_b0 + 4); + b11 = vld1q_f32(mtx_b1 + 4); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + a4 = vld1q_dup_f32(mtx_a0 + 4); + a5 = vld1q_dup_f32(mtx_a0 + 5); + a6 = vld1q_dup_f32(mtx_a0 + 6); + a7 = vld1q_dup_f32(mtx_a0 + 7); + + // 4x4 block 1 + acc01 = vmlaq_f32(acc01, b10, a0); + acc11 = vmlaq_f32(acc11, b10, a1); + acc21 = vmlaq_f32(acc21, b10, a2); + acc31 = vmlaq_f32(acc31, b10, a3); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b01, a4); + acc10 = vmlaq_f32(acc10, b01, a5); + acc20 = vmlaq_f32(acc20, b01, a6); + acc30 = vmlaq_f32(acc30, b01, a7); + + // 4x4 block 1 + acc01 = vmlaq_f32(acc01, b11, a4); + acc11 = vmlaq_f32(acc11, b11, a5); + acc21 = vmlaq_f32(acc21, b11, a6); + acc31 = vmlaq_f32(acc31, b11, a7); + + mtx_a0 += 8; + mtx_b0 += 8; + mtx_b1 += 8; + + a0 = vld1q_dup_f32(mtx_a0 + 0); + a1 = vld1q_dup_f32(mtx_a0 + 1); + a2 = vld1q_dup_f32(mtx_a0 + 2); + a3 = vld1q_dup_f32(mtx_a0 + 3); + b00 = vld1q_f32(mtx_b0); + b10 = vld1q_f32(mtx_b1); + b01 = vld1q_f32(mtx_b0 + 4); + b11 = vld1q_f32(mtx_b1 + 4); #if __arm__ - asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_a0))); - asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b0))); - asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b1))); + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_a0))); + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b0))); + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b1))); #endif /* __arm__ */ - // 4x4 block 0 - acc00 = vmlaq_f32(acc00, b00, a0); - acc10 = vmlaq_f32(acc10, b00, a1); - acc20 = vmlaq_f32(acc20, b00, a2); - acc30 = vmlaq_f32(acc30, b00, a3); - - a4 = vld1q_dup_f32(mtx_a0 + 4); - a5 = vld1q_dup_f32(mtx_a0 + 5); - a6 = vld1q_dup_f32(mtx_a0 + 6); - a7 = vld1q_dup_f32(mtx_a0 + 7); - - // 4x4 block 1 - acc01 = vmlaq_f32(acc01, b10, a0); - acc11 = vmlaq_f32(acc11, b10, a1); - acc21 = vmlaq_f32(acc21, b10, a2); - acc31 = vmlaq_f32(acc31, b10, a3); - - // 4x4 block 0 - acc00 = vmlaq_f32(acc00, b01, a4); - acc10 = vmlaq_f32(acc10, b01, a5); - acc20 = vmlaq_f32(acc20, b01, a6); - acc30 = vmlaq_f32(acc30, b01, a7); - - // 4x4 block 1 - acc01 = vmlaq_f32(acc01, b11, a4); - acc11 = vmlaq_f32(acc11, b11, a5); - acc21 = vmlaq_f32(acc21, b11, a6); - acc31 = vmlaq_f32(acc31, b11, a7); - - mtx_a0 += 8; - mtx_b0 += 8; - mtx_b1 += 8; - - a0 = vld1q_dup_f32(mtx_a0 + 0); - a1 = vld1q_dup_f32(mtx_a0 + 1); - a2 = vld1q_dup_f32(mtx_a0 + 2); - a3 = vld1q_dup_f32(mtx_a0 + 3); - b00 = vld1q_f32(mtx_b0); - b10 = vld1q_f32(mtx_b1); - b01 = vld1q_f32(mtx_b0 + 4); - b11 = vld1q_f32(mtx_b1 + 4); - - // 4x4 block 0 - acc00 = vmlaq_f32(acc00, b00, a0); - acc10 = vmlaq_f32(acc10, b00, a1); - acc20 = vmlaq_f32(acc20, b00, a2); - acc30 = vmlaq_f32(acc30, b00, a3); - - a4 = vld1q_dup_f32(mtx_a0 + 4); - a5 = vld1q_dup_f32(mtx_a0 + 5); - a6 = vld1q_dup_f32(mtx_a0 + 6); - a7 = vld1q_dup_f32(mtx_a0 + 7); - - // 4x4 block 1 - acc01 = vmlaq_f32(acc01, b10, a0); - acc11 = vmlaq_f32(acc11, b10, a1); - acc21 = vmlaq_f32(acc21, b10, a2); - acc31 = vmlaq_f32(acc31, b10, a3); - - // 4x4 block 0 - acc00 = vmlaq_f32(acc00, b01, a4); - acc10 = vmlaq_f32(acc10, b01, a5); - acc20 = vmlaq_f32(acc20, b01, a6); - acc30 = vmlaq_f32(acc30, b01, a7); - - // 4x4 block 1 - acc01 = vmlaq_f32(acc01, b11, a4); - acc11 = vmlaq_f32(acc11, b11, a5); - acc21 = vmlaq_f32(acc21, b11, a6); - acc31 = vmlaq_f32(acc31, b11, a7); - - mtx_a0 += 8; - mtx_b0 += 8; - mtx_b1 += 8; + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + a4 = vld1q_dup_f32(mtx_a0 + 4); + a5 = vld1q_dup_f32(mtx_a0 + 5); + a6 = vld1q_dup_f32(mtx_a0 + 6); + a7 = vld1q_dup_f32(mtx_a0 + 7); + + // 4x4 block 1 + acc01 = vmlaq_f32(acc01, b10, a0); + acc11 = vmlaq_f32(acc11, b10, a1); + acc21 = vmlaq_f32(acc21, b10, a2); + acc31 = vmlaq_f32(acc31, b10, a3); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b01, a4); + acc10 = vmlaq_f32(acc10, b01, a5); + acc20 = vmlaq_f32(acc20, b01, a6); + acc30 = vmlaq_f32(acc30, b01, a7); + + // 4x4 block 1 + acc01 = vmlaq_f32(acc01, b11, a4); + acc11 = vmlaq_f32(acc11, b11, a5); + acc21 = vmlaq_f32(acc21, b11, a6); + acc31 = vmlaq_f32(acc31, b11, a7); + + mtx_a0 += 8; + mtx_b0 += 8; + mtx_b1 += 8; + + a0 = vld1q_dup_f32(mtx_a0 + 0); + a1 = vld1q_dup_f32(mtx_a0 + 1); + a2 = vld1q_dup_f32(mtx_a0 + 2); + a3 = vld1q_dup_f32(mtx_a0 + 3); + b00 = vld1q_f32(mtx_b0); + b10 = vld1q_f32(mtx_b1); + b01 = vld1q_f32(mtx_b0 + 4); + b11 = vld1q_f32(mtx_b1 + 4); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + a4 = vld1q_dup_f32(mtx_a0 + 4); + a5 = vld1q_dup_f32(mtx_a0 + 5); + a6 = vld1q_dup_f32(mtx_a0 + 6); + a7 = vld1q_dup_f32(mtx_a0 + 7); + + // 4x4 block 1 + acc01 = vmlaq_f32(acc01, b10, a0); + acc11 = vmlaq_f32(acc11, b10, a1); + acc21 = vmlaq_f32(acc21, b10, a2); + acc31 = vmlaq_f32(acc31, b10, a3); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b01, a4); + acc10 = vmlaq_f32(acc10, b01, a5); + acc20 = vmlaq_f32(acc20, b01, a6); + acc30 = vmlaq_f32(acc30, b01, a7); + + // 4x4 block 1 + acc01 = vmlaq_f32(acc01, b11, a4); + acc11 = vmlaq_f32(acc11, b11, a5); + acc21 = vmlaq_f32(acc21, b11, a6); + acc31 = vmlaq_f32(acc31, b11, a7); + + mtx_a0 += 8; + mtx_b0 += 8; + mtx_b1 += 8; + } + + // Only consider one row from matrix b if subsequent row is out of boundary. + for (; mtx_b0 < mtx_b0_end_addr;) + { + float32x4_t a0 = vld1q_dup_f32(mtx_a0 + 0); + float32x4_t a1 = vld1q_dup_f32(mtx_a0 + 1); + float32x4_t a2 = vld1q_dup_f32(mtx_a0 + 2); + float32x4_t a3 = vld1q_dup_f32(mtx_a0 + 3); + float32x4_t b00 = vld1q_f32(mtx_b0); + float32x4_t b10 = vld1q_f32(mtx_b1); + +#if __arm__ + asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_a0))); + asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_b0))); + asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_b1))); +#endif /* __arm__ */ + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + // 4x4 block 1 + acc01 = vmlaq_f32(acc01, b10, a0); + acc11 = vmlaq_f32(acc11, b10, a1); + acc21 = vmlaq_f32(acc21, b10, a2); + acc31 = vmlaq_f32(acc31, b10, a3); + + mtx_a0 += 4; + mtx_b0 += 4; + mtx_b1 += 4; + } } - for (; mtx_b0 < mtx_b0_end_addr;) + // Leftover last row in matrix b, in case of there are odd number of rows in matrix B + else if (mtx_b0 < end_addr_mtx_b_per_batch[id.z()]) { - float32x4_t a0 = vld1q_dup_f32(mtx_a0 + 0); - float32x4_t a1 = vld1q_dup_f32(mtx_a0 + 1); - float32x4_t a2 = vld1q_dup_f32(mtx_a0 + 2); - float32x4_t a3 = vld1q_dup_f32(mtx_a0 + 3); - float32x4_t b00 = vld1q_f32(mtx_b0); - float32x4_t b10 = vld1q_f32(mtx_b1); + for (; mtx_b0 < (mtx_b0_end_addr - 32);) + { + float32x4_t a0 = vld1q_dup_f32(mtx_a0 + 0); + float32x4_t a1 = vld1q_dup_f32(mtx_a0 + 1); + float32x4_t a2 = vld1q_dup_f32(mtx_a0 + 2); + float32x4_t a3 = vld1q_dup_f32(mtx_a0 + 3); + + float32x4_t b00 = vld1q_f32(mtx_b0); + float32x4_t b01 = vld1q_f32(mtx_b0 + 4); #if __arm__ - asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_a0))); - asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_b0))); - asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_b1))); + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_a0))); + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b0))); #endif /* __arm__ */ - // 4x4 block 0 - acc00 = vmlaq_f32(acc00, b00, a0); - acc10 = vmlaq_f32(acc10, b00, a1); - acc20 = vmlaq_f32(acc20, b00, a2); - acc30 = vmlaq_f32(acc30, b00, a3); - - // 4x4 block 1 - acc01 = vmlaq_f32(acc01, b10, a0); - acc11 = vmlaq_f32(acc11, b10, a1); - acc21 = vmlaq_f32(acc21, b10, a2); - acc31 = vmlaq_f32(acc31, b10, a3); - - mtx_a0 += 4; - mtx_b0 += 4; - mtx_b1 += 4; + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + float32x4_t a4 = vld1q_dup_f32(mtx_a0 + 4); + float32x4_t a5 = vld1q_dup_f32(mtx_a0 + 5); + float32x4_t a6 = vld1q_dup_f32(mtx_a0 + 6); + float32x4_t a7 = vld1q_dup_f32(mtx_a0 + 7); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b01, a4); + acc10 = vmlaq_f32(acc10, b01, a5); + acc20 = vmlaq_f32(acc20, b01, a6); + acc30 = vmlaq_f32(acc30, b01, a7); + + mtx_a0 += 8; + mtx_b0 += 8; + + a0 = vld1q_dup_f32(mtx_a0 + 0); + a1 = vld1q_dup_f32(mtx_a0 + 1); + a2 = vld1q_dup_f32(mtx_a0 + 2); + a3 = vld1q_dup_f32(mtx_a0 + 3); + + b00 = vld1q_f32(mtx_b0); + b01 = vld1q_f32(mtx_b0 + 4); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + a4 = vld1q_dup_f32(mtx_a0 + 4); + a5 = vld1q_dup_f32(mtx_a0 + 5); + a6 = vld1q_dup_f32(mtx_a0 + 6); + a7 = vld1q_dup_f32(mtx_a0 + 7); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b01, a4); + acc10 = vmlaq_f32(acc10, b01, a5); + acc20 = vmlaq_f32(acc20, b01, a6); + acc30 = vmlaq_f32(acc30, b01, a7); + + mtx_a0 += 8; + mtx_b0 += 8; + + a0 = vld1q_dup_f32(mtx_a0 + 0); + a1 = vld1q_dup_f32(mtx_a0 + 1); + a2 = vld1q_dup_f32(mtx_a0 + 2); + a3 = vld1q_dup_f32(mtx_a0 + 3); + b00 = vld1q_f32(mtx_b0); + b01 = vld1q_f32(mtx_b0 + 4); + +#if __arm__ + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_a0))); + asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b0))); +#endif /* __arm__ */ + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + a4 = vld1q_dup_f32(mtx_a0 + 4); + a5 = vld1q_dup_f32(mtx_a0 + 5); + a6 = vld1q_dup_f32(mtx_a0 + 6); + a7 = vld1q_dup_f32(mtx_a0 + 7); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b01, a4); + acc10 = vmlaq_f32(acc10, b01, a5); + acc20 = vmlaq_f32(acc20, b01, a6); + acc30 = vmlaq_f32(acc30, b01, a7); + + mtx_a0 += 8; + mtx_b0 += 8; + + a0 = vld1q_dup_f32(mtx_a0 + 0); + a1 = vld1q_dup_f32(mtx_a0 + 1); + a2 = vld1q_dup_f32(mtx_a0 + 2); + a3 = vld1q_dup_f32(mtx_a0 + 3); + b00 = vld1q_f32(mtx_b0); + b01 = vld1q_f32(mtx_b0 + 4); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + a4 = vld1q_dup_f32(mtx_a0 + 4); + a5 = vld1q_dup_f32(mtx_a0 + 5); + a6 = vld1q_dup_f32(mtx_a0 + 6); + a7 = vld1q_dup_f32(mtx_a0 + 7); + + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b01, a4); + acc10 = vmlaq_f32(acc10, b01, a5); + acc20 = vmlaq_f32(acc20, b01, a6); + acc30 = vmlaq_f32(acc30, b01, a7); + + mtx_a0 += 8; + mtx_b0 += 8; + } + for (; mtx_b0 < mtx_b0_end_addr;) + { + float32x4_t a0 = vld1q_dup_f32(mtx_a0 + 0); + float32x4_t a1 = vld1q_dup_f32(mtx_a0 + 1); + float32x4_t a2 = vld1q_dup_f32(mtx_a0 + 2); + float32x4_t a3 = vld1q_dup_f32(mtx_a0 + 3); + float32x4_t b00 = vld1q_f32(mtx_b0); + +#if __arm__ + asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_a0))); + asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_b0))); +#endif /* __arm__ */ + // 4x4 block 0 + acc00 = vmlaq_f32(acc00, b00, a0); + acc10 = vmlaq_f32(acc10, b00, a1); + acc20 = vmlaq_f32(acc20, b00, a2); + acc30 = vmlaq_f32(acc30, b00, a3); + + mtx_a0 += 4; + mtx_b0 += 4; + } } // Multiply by the weight of matrix product (alpha) diff --git a/src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp b/src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp index 344b9df0c8..c73d1def6b 100644 --- a/src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp +++ b/src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp @@ -53,23 +53,24 @@ void mean_stddev_normalization(ITensor *input, ITensor *output, fl auto in_ptr = reinterpret_cast(input_itr.ptr()); auto out_ptr = reinterpret_cast(output_itr.ptr()); - float16x8_t sum_vec = vdupq_n_f16(static_cast(0.0f)); + float32x4x2_t sum_vec = {vdupq_n_f32(0.0f), vdupq_n_f32(0.0f)}; + float32x4_t sum_sq_vec = vdupq_n_f32(0.0f); for (; x <= (window_end_x - window_step_x); x += window_step_x) { float16x8_t data = vld1q_f16(in_ptr + x); - sum_vec = vaddq_f16(sum_vec, data); float32x4_t dl = vcvt_f32_f16(vget_low_f16(data)); float32x4_t dh = vcvt_f32_f16(vget_high_f16(data)); + sum_vec.val[0] = vaddq_f32(sum_vec.val[0], dl); + sum_vec.val[1] = vaddq_f32(sum_vec.val[1], dh); sum_sq_vec = vaddq_f32(sum_sq_vec, vmulq_f32(dl, dl)); sum_sq_vec = vaddq_f32(sum_sq_vec, vmulq_f32(dh, dh)); } - float32x4_t sum_carry_res = - vpaddq_f32(vcvt_f32_f16(vget_high_f16(sum_vec)), vcvt_f32_f16(vget_low_f16(sum_vec))); - float sum = vaddvq_f32(sum_carry_res); - float sum_sq = vaddvq_f32(sum_sq_vec); + float32x4_t sum_carry_res = vpaddq_f32(sum_vec.val[0], sum_vec.val[1]); + float sum = vaddvq_f32(sum_carry_res); + float sum_sq = vaddvq_f32(sum_sq_vec); // Compute left-over elements for (; x < window_end_x; ++x) diff --git a/src/cpu/operators/CpuConv2d.h b/src/cpu/operators/CpuConv2d.h index 71b9e15dc1..3f98e71896 100644 --- a/src/cpu/operators/CpuConv2d.h +++ b/src/cpu/operators/CpuConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,6 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + +#ifndef ACL_SRC_CPU_OPERATORS_CPUCONV2D_H +#define ACL_SRC_CPU_OPERATORS_CPUCONV2D_H + #include "arm_compute/function_info/ActivationLayerInfo.h" #include "src/core/common/Macros.h" @@ -167,3 +171,5 @@ class CpuConv2d : public ICpuOperator }; } // namespace cpu } // namespace arm_compute + +#endif // ACL_SRC_CPU_OPERATORS_CPUCONV2D_H diff --git a/src/cpu/operators/CpuGemm.cpp b/src/cpu/operators/CpuGemm.cpp index 905e86c185..c489b256b8 100644 --- a/src/cpu/operators/CpuGemm.cpp +++ b/src/cpu/operators/CpuGemm.cpp @@ -174,8 +174,8 @@ void CpuGemm::configure(const ITensorInfo *a, // Configure rhs transpose1xw kernel _transpose1xW_b_kernel = std::make_unique(); _transpose1xW_b_kernel->configure(b_to_use, &_tmp_b); - _aux_mem[Transposed1xWRHS] = - MemoryInfo(offset_int_vec(Transposed1xWRHS), MemoryLifetime::Persistent, _tmp_b.total_size()); + const auto lifetime = _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary; + _aux_mem[Transposed1xWRHS] = MemoryInfo(offset_int_vec(Transposed1xWRHS), lifetime, _tmp_b.total_size()); // Use a and b here instead of _tmp_a and _tmp_b because CpuGemmMatrixMultiplyKernel requires the original m,n,k in case of interleaved a and transposed1xw b const int m = a->dimension(1); diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp index 55d950ff4a..f3b78f8885 100644 --- a/src/cpu/operators/CpuGemmConv2d.cpp +++ b/src/cpu/operators/CpuGemmConv2d.cpp @@ -589,8 +589,14 @@ void CpuGemmConv2d::configure(const ITensorInfo *src, // WeightsReshaped in prepare // Otherwise WeightsReshaped is the final transformation of weights and needs to persist bool gemm_trans_wei = _aux_mem[GemmAsmPretransposedRHS].size > 0; - gemm_trans_wei = _mm_gemm != nullptr ? _aux_mem[GemmTransposed1xWRHS].size > 0 : gemm_trans_wei; - gemm_trans_wei = _mm_gemmlowp != nullptr ? _aux_mem[GemmLowpTransposed1xWRHS].size > 0 : gemm_trans_wei; + if (_mm_gemm != nullptr) + { + gemm_trans_wei |= _aux_mem[GemmTransposed1xWRHS].size > 0; + } + if (_mm_gemmlowp != nullptr) + { + gemm_trans_wei |= _aux_mem[GemmLowpTransposed1xWRHS].size > 0; + } _aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped), gemm_trans_wei ? MemoryLifetime::Prepare : MemoryLifetime::Persistent, diff --git a/src/cpu/operators/CpuGemmConv2d.h b/src/cpu/operators/CpuGemmConv2d.h index 48a0d11107..fa16ce860b 100644 --- a/src/cpu/operators/CpuGemmConv2d.h +++ b/src/cpu/operators/CpuGemmConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/cpu/operators/CpuPermute.cpp b/src/cpu/operators/CpuPermute.cpp index 25acc92d00..2d4e009d51 100644 --- a/src/cpu/operators/CpuPermute.cpp +++ b/src/cpu/operators/CpuPermute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,23 +23,91 @@ */ #include "src/cpu/operators/CpuPermute.h" +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorInfo.h" + #include "src/common/utils/Log.h" +#include "src/cpu/kernels/CpuCopyKernel.h" #include "src/cpu/kernels/CpuPermuteKernel.h" +#include "src/cpu/kernels/CpuTransposeKernel.h" + +#include +#include +#include namespace arm_compute { namespace cpu { +namespace +{ +// Handle "No-op" cases +bool prefer_copy(const PermutationVector &v) +{ + static const std::array permutations = {{ + PermutationVector(0U), + PermutationVector(0U, 1U), + PermutationVector(0U, 1U, 2U), + PermutationVector(0U, 1U, 2U, 3U), + PermutationVector(0U, 1U, 2U, 3U, 4U), + PermutationVector(0U, 1U, 2U, 3U, 4U, 5U), + }}; + + return std::find(permutations.begin(), permutations.end(), v) != permutations.end(); +} + +// Transpose kernel is optimized for permuting the first two dimensions of a tensor +bool prefer_transpose(const PermutationVector &v) +{ + static const std::array permutations = {{ + PermutationVector(1U, 0U), + PermutationVector(1U, 0U, 2U), + PermutationVector(1U, 0U, 2U, 3U), + PermutationVector(1U, 0U, 2U, 3U, 4U), + PermutationVector(1U, 0U, 2U, 3U, 4U, 5U), + }}; + + return std::find(permutations.begin(), permutations.end(), v) != permutations.end(); +} +} // namespace + void CpuPermute::configure(const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm) { ARM_COMPUTE_LOG_PARAMS(src, dst, perm); - auto k = std::make_unique(); - k->configure(src, dst, perm); - _kernel = std::move(k); + + if (prefer_copy(perm)) + { + auto k = std::make_unique(); + k->configure(src, dst); + _kernel = std::move(k); + } + else if (prefer_transpose(perm)) + { + auto k = std::make_unique(); + k->configure(src, dst); + _kernel = std::move(k); + } + else + { + auto k = std::make_unique(); + k->configure(src, dst, perm); + _kernel = std::move(k); + } } Status CpuPermute::validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm) { + if (prefer_copy(perm)) + { + return kernels::CpuCopyKernel::validate(src, dst); + } + + if (prefer_transpose(perm)) + { + return kernels::CpuTransposeKernel::validate(src, dst); + } + return kernels::CpuPermuteKernel::validate(src, dst, perm); } } // namespace cpu diff --git a/src/cpu/operators/CpuWinogradConv2d.cpp b/src/cpu/operators/CpuWinogradConv2d.cpp index 7d81aee0e9..7ed2f14ac5 100644 --- a/src/cpu/operators/CpuWinogradConv2d.cpp +++ b/src/cpu/operators/CpuWinogradConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -309,7 +309,7 @@ void CpuWinogradConv2d::configure(const ITensorInfo *src, std::max(input_workspace_size, output_workspace_size)); _aux_mem[PermutedWeights] = MemoryInfo(offset_int_vec(PermutedWeights), MemoryLifetime::Prepare, _weights_hwio.total_size()); - _aux_mem[TransformedWeights] = MemoryInfo(offset_int_vec(TransformedWeights), MemoryLifetime::Persistent, + _aux_mem[TransformedWeights] = MemoryInfo(offset_int_vec(TransformedWeights), MemoryLifetime::Prepare, wds.weight_matrix_size_bytes, storage_alignment); if (_data_layout == DataLayout::NCHW) { diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp index a4c856bb8f..fb9bc15212 100644 --- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp +++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -531,8 +531,8 @@ void Fallback::configure(const ITensorInfo * const unsigned int alignment = 128; const size_t B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size(); _pretranspose_info = TensorInfo(TensorShape(B_pretranspose_size), 1, DataType::U8); - _aux_mem[Pretranspose] = - MemoryInfo(offset_int_vec(Pretranspose), MemoryLifetime::Persistent, B_pretranspose_size, alignment); + MemoryLifetime lifetime = _is_b_constant ? MemoryLifetime::Persistent : MemoryLifetime::Temporary; + _aux_mem[Pretranspose] = MemoryInfo(offset_int_vec(Pretranspose), lifetime, B_pretranspose_size, alignment); } // Handle indirect GEMM convolution diff --git a/src/gpu/cl/kernels/ClCropKernel.cpp b/src/gpu/cl/kernels/ClCropKernel.cpp index 0c503e13fc..6c5066779f 100644 --- a/src/gpu/cl/kernels/ClCropKernel.cpp +++ b/src/gpu/cl/kernels/ClCropKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -105,7 +105,6 @@ Status ClCropKernel::validate(const ITensorInfo *src, Window *dst_window) { ARM_COMPUTE_UNUSED(extrapolation_value, dst_window); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC); ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().num_dimensions() > 4); diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 3b50234c77..9338ef6249 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -96,7 +96,6 @@ CLLSTMLayer::CLLSTMLayer(std::shared_ptr memory_manager) _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), - _forget_gate_out6(), _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), @@ -209,18 +208,17 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, // forget_gate = Activation(input * input_to_forget_weights + output_state_in * recurrent_to_forget_weights + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias) // We optimize this as follows: // forget_gate = Activation( (input,output_state_in) * (input_to_forget_weights,recurrent_to_forget_weights) + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias - _forget_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); - _forget_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); - _forget_gate_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); + _forget_gate_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); + _forget_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); std::vector inputs_vector; inputs_vector.emplace_back(input); inputs_vector.emplace_back(output_state_in); const TensorShape concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, 0); - _forget_gate_out2.allocator()->init(TensorInfo(concat_shape, 1, input->info()->data_type())); + _forget_gate_out1.allocator()->init(TensorInfo(concat_shape, 1, input->info()->data_type())); - _memory_group.manage(&_forget_gate_out2); - _concat_inputs_forget_gate.configure(compile_context, inputs_vector, &_forget_gate_out2, Window::DimX); + _memory_group.manage(&_forget_gate_out1); + _concat_inputs_forget_gate.configure(compile_context, inputs_vector, &_forget_gate_out1, Window::DimX); std::vector weights_vector; @@ -228,36 +226,35 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, weights_vector.emplace_back(recurrent_to_forget_weights); const TensorShape weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(weights_vector, 0); - _forget_gate_out6.allocator()->init(TensorInfo(weights_concat_shape, 1, input->info()->data_type())); + _forget_gate_out5.allocator()->init(TensorInfo(weights_concat_shape, 1, input->info()->data_type())); - _concat_weights_forget_gate.configure(compile_context, weights_vector, &_forget_gate_out6, Window::DimX); + _concat_weights_forget_gate.configure(compile_context, weights_vector, &_forget_gate_out5, Window::DimX); - _memory_group.manage(&_forget_gate_out5); - _fully_connected_forget_gate.configure(compile_context, &_forget_gate_out2, &_forget_gate_out6, - (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out5); - _memory_group.manage(&_forget_gate_out1); - _memory_group.manage(&_forget_gate_out3); - _forget_gate_out6.allocator()->allocate(); + _memory_group.manage(&_forget_gate_out4); + _fully_connected_forget_gate.configure(compile_context, &_forget_gate_out1, &_forget_gate_out5, + (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out4); + _memory_group.manage(&_forget_gate_out2); + _forget_gate_out5.allocator()->allocate(); - CLTensor *forget_gate_out = &_forget_gate_out5; + CLTensor *forget_gate_out = &_forget_gate_out4; if (lstm_params.has_peephole_opt()) { - _forget_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); + _forget_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); _run_peephole_opt = true; - _memory_group.manage(&_forget_gate_out4); + _memory_group.manage(&_forget_gate_out3); _pixelwise_mul_forget_gate.configure(compile_context, cell_state_in, lstm_params.cell_to_forget_weights(), - &_forget_gate_out4, 1, ConvertPolicy::SATURATE, + &_forget_gate_out3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN); - _accum_forget_gate1.configure(compile_context, &_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3, + _accum_forget_gate1.configure(compile_context, &_forget_gate_out4, &_forget_gate_out3, &_forget_gate_out2, ConvertPolicy::SATURATE); + _forget_gate_out3.allocator()->allocate(); _forget_gate_out4.allocator()->allocate(); - _forget_gate_out5.allocator()->allocate(); - forget_gate_out = &_forget_gate_out3; + forget_gate_out = &_forget_gate_out2; } else { - _forget_gate_out3.allocator()->allocate(); + _forget_gate_out2.allocator()->allocate(); } if (_is_layer_norm_lstm) { @@ -313,7 +310,7 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, _memory_group.manage(&_input_gate_out1); _memory_group.manage(&_input_gate_out3); - _fully_connected_input_gate.configure(compile_context, &_forget_gate_out2, &_input_gate_out2, + _fully_connected_input_gate.configure(compile_context, &_forget_gate_out1, &_input_gate_out2, (_is_layer_norm_lstm) ? nullptr : lstm_params.input_gate_bias(), &_input_gate_out3); _input_gate_out2.allocator()->allocate(); @@ -435,11 +432,11 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, _memory_group.manage(&_output1); _memory_group.manage(&_output4); - _fully_connected_output.configure(compile_context, &_forget_gate_out2, &_output2, + _fully_connected_output.configure(compile_context, &_forget_gate_out1, &_output2, (_is_layer_norm_lstm) ? nullptr : output_gate_bias, &_output4); _output2.allocator()->allocate(); - _forget_gate_out2.allocator()->allocate(); + _forget_gate_out1.allocator()->allocate(); CLTensor *output_gate_out = &_output4; if (lstm_params.has_peephole_opt()) diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp index 1a08cdeb06..92b4e26a91 100644 --- a/src/runtime/NEON/functions/NELSTMLayer.cpp +++ b/src/runtime/NEON/functions/NELSTMLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022 Arm Limited. + * Copyright (c) 2018-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -95,7 +95,6 @@ NELSTMLayer::NELSTMLayer(std::shared_ptr memory_manager) _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), - _forget_gate_out6(), _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), @@ -177,49 +176,48 @@ void NELSTMLayer::configure(const ITensor *input, // forget_gate = Activation(input * input_to_forget_weights + output_state_in * recurrent_to_forget_weights + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias) // We optimize this as follows: // forget_gate = Activation( (input,output_state_in) * (input_to_forget_weights,recurrent_to_forget_weights) + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias) - _forget_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); - _forget_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); - _forget_gate_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); + _forget_gate_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); + _forget_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); std::vector inputs_vector; inputs_vector.emplace_back(input); inputs_vector.emplace_back(output_state_in); - _memory_group.manage(&_forget_gate_out2); - _concat_inputs_forget_gate.configure(inputs_vector, &_forget_gate_out2, Window::DimX); + _memory_group.manage(&_forget_gate_out1); + _concat_inputs_forget_gate.configure(inputs_vector, &_forget_gate_out1, Window::DimX); std::vector weights_vector; weights_vector.emplace_back(input_to_forget_weights); weights_vector.emplace_back(recurrent_to_forget_weights); - _concat_weights_forget_gate.configure(weights_vector, &_forget_gate_out6, Window::DimX); + _concat_weights_forget_gate.configure(weights_vector, &_forget_gate_out5, Window::DimX); - _memory_group.manage(&_forget_gate_out5); - _fully_connected_forget_gate.configure(&_forget_gate_out2, &_forget_gate_out6, - (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out5); - _memory_group.manage(&_forget_gate_out1); - _memory_group.manage(&_forget_gate_out3); - _forget_gate_out6.allocator()->allocate(); + _memory_group.manage(&_forget_gate_out4); + _fully_connected_forget_gate.configure(&_forget_gate_out1, &_forget_gate_out5, + (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out4); + + _memory_group.manage(&_forget_gate_out2); + _forget_gate_out5.allocator()->allocate(); - Tensor *forget_gate_out = &_forget_gate_out5; + Tensor *forget_gate_out = &_forget_gate_out4; if (lstm_params.has_peephole_opt()) { - _forget_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); + _forget_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); _run_peephole_opt = true; - _memory_group.manage(&_forget_gate_out4); - _pixelwise_mul_forget_gate.configure(cell_state_in, lstm_params.cell_to_forget_weights(), &_forget_gate_out4, 1, + _memory_group.manage(&_forget_gate_out3); + _pixelwise_mul_forget_gate.configure(cell_state_in, lstm_params.cell_to_forget_weights(), &_forget_gate_out3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); - _accum_forget_gate1.configure(&_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3, + _accum_forget_gate1.configure(&_forget_gate_out4, &_forget_gate_out3, &_forget_gate_out2, ConvertPolicy::SATURATE); + _forget_gate_out3.allocator()->allocate(); _forget_gate_out4.allocator()->allocate(); - _forget_gate_out5.allocator()->allocate(); - forget_gate_out = &_forget_gate_out3; + forget_gate_out = &_forget_gate_out2; } else { - _forget_gate_out3.allocator()->allocate(); + _forget_gate_out2.allocator()->allocate(); } if (_is_layer_norm_lstm) { @@ -268,9 +266,8 @@ void NELSTMLayer::configure(const ITensor *input, _concat_weights_input_gate.configure(lstm_weights, &_input_gate_out2, Window::DimX); _memory_group.manage(&_input_gate_out1); - _memory_group.manage(&_input_gate_out4); - _fully_connected_input_gate.configure(&_forget_gate_out2, &_input_gate_out2, + _fully_connected_input_gate.configure(&_forget_gate_out1, &_input_gate_out2, (_is_layer_norm_lstm) ? nullptr : lstm_params.input_gate_bias(), &_input_gate_out3); _input_gate_out2.allocator()->allocate(); @@ -384,11 +381,11 @@ void NELSTMLayer::configure(const ITensor *input, _memory_group.manage(&_output1); _memory_group.manage(&_output4); - _fully_connected_output.configure(&_forget_gate_out2, &_output2, (_is_layer_norm_lstm) ? nullptr : output_gate_bias, + _fully_connected_output.configure(&_forget_gate_out1, &_output2, (_is_layer_norm_lstm) ? nullptr : output_gate_bias, &_output4); _output2.allocator()->allocate(); - _forget_gate_out2.allocator()->allocate(); + _forget_gate_out1.allocator()->allocate(); Tensor *output_gate_out = &_output4; if (lstm_params.has_peephole_opt()) @@ -447,7 +444,6 @@ void NELSTMLayer::configure(const ITensor *input, _pixelwise_mul_output_state2.configure(&_cell_state_activation, output_gate_out, output_state_out_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); _cell_state_activation.allocator()->allocate(); - output_gate_out->allocator()->allocate(); if (lstm_params.has_projection()) { diff --git a/src/runtime/experimental/operators/CpuActivation.cpp b/src/runtime/experimental/operators/CpuActivation.cpp new file mode 100644 index 0000000000..1f29b28315 --- /dev/null +++ b/src/runtime/experimental/operators/CpuActivation.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/experimental/operators/CpuActivation.h" + +#include "src/cpu/operators/CpuActivation.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace op +{ + +struct CpuActivation::Impl +{ + std::unique_ptr op{nullptr}; +}; + +CpuActivation::CpuActivation() : _impl(std::make_unique()) +{ + _impl->op = std::make_unique(); +} + +CpuActivation::~CpuActivation() = default; + +void CpuActivation::configure(const ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + _impl->op->configure(src, dst, act_info); +} + +Status CpuActivation::validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + return cpu::CpuActivation::validate(src, dst, act_info); +} + +void CpuActivation::run(ITensorPack &tensors) +{ + _impl->op->run(tensors); +} + +} // namespace op +} // namespace experimental +} // namespace arm_compute diff --git a/src/runtime/experimental/operators/CpuGemm.cpp b/src/runtime/experimental/operators/CpuGemm.cpp new file mode 100644 index 0000000000..9111367d51 --- /dev/null +++ b/src/runtime/experimental/operators/CpuGemm.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/experimental/operators/CpuGemm.h" + +#include "src/cpu/operators/CpuGemm.h" + +namespace arm_compute +{ + +namespace experimental +{ +namespace ops +{ + +struct CpuGemm::Impl +{ + std::unique_ptr cpu_gemm{nullptr}; +}; + +CpuGemm::CpuGemm() : _impl(std::make_unique()) +{ + _impl->cpu_gemm = std::make_unique(); +} + +CpuGemm::~CpuGemm() = default; + +void CpuGemm::configure(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info) +{ + _impl->cpu_gemm->configure(a, b, c, d, alpha, beta, gemm_info); +} + +Status CpuGemm::validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info) +{ + return cpu::CpuGemm::validate(a, b, c, d, alpha, beta, gemm_info); +} + +Status CpuGemm::has_opt_impl(arm_compute::WeightFormat &weight_format, + const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + const GEMMInfo &gemm_info) +{ + return cpu::CpuGemm::has_opt_impl(weight_format, a, b, c, d, gemm_info); +} + +void CpuGemm::run(ITensorPack &tensors) +{ + _impl->cpu_gemm->run(tensors); +} +void CpuGemm::prepare(ITensorPack &constants) +{ + _impl->cpu_gemm->prepare(constants); +} +experimental::MemoryRequirements CpuGemm::workspace() const +{ + return _impl->cpu_gemm->workspace(); +} + +} // namespace ops +} // namespace experimental +} // namespace arm_compute diff --git a/src/runtime/experimental/operators/CpuGemmConv2d.cpp b/src/runtime/experimental/operators/CpuGemmConv2d.cpp new file mode 100644 index 0000000000..df461f5ed0 --- /dev/null +++ b/src/runtime/experimental/operators/CpuGemmConv2d.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/experimental/operators/CpuGemmConv2d.h" + +#include "src/cpu/operators/CpuGemmConv2d.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace op +{ + +struct CpuGemmConv2d::Impl +{ + std::unique_ptr op{nullptr}; +}; + +CpuGemmConv2d::CpuGemmConv2d() : _impl(std::make_unique()) +{ + _impl->op = std::make_unique(); +} + +CpuGemmConv2d::~CpuGemmConv2d() = default; + +void CpuGemmConv2d::configure(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + ITensorInfo *dst, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info, + const Size2D &dilation, + const ActivationLayerInfo &act_info, + bool enable_fast_math, + unsigned int num_groups) +{ + _impl->op->configure(src, weights, biases, dst, conv_info, weights_info, dilation, act_info, enable_fast_math, + num_groups); +} + +Status CpuGemmConv2d::validate(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info, + const Size2D &dilation, + const ActivationLayerInfo &act_info, + bool enable_fast_math, + unsigned int num_groups) +{ + return cpu::CpuGemmConv2d::validate(src, weights, biases, output, conv_info, weights_info, dilation, act_info, + enable_fast_math, num_groups); +} + +Status CpuGemmConv2d::has_opt_impl(arm_compute::WeightFormat &expected_weight_format, + const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info, + const Size2D &dilation, + const ActivationLayerInfo &act_info, + const bool enable_fast_math) +{ + return cpu::CpuGemmConv2d::has_opt_impl(expected_weight_format, src, weights, biases, output, conv_info, + weights_info, dilation, act_info, enable_fast_math); +} + +void CpuGemmConv2d::run(ITensorPack &tensors) +{ + _impl->op->run(tensors); +} + +void CpuGemmConv2d::prepare(ITensorPack &tensors) +{ + _impl->op->prepare(tensors); +} + +experimental::MemoryRequirements CpuGemmConv2d::workspace() const +{ + return _impl->op->workspace(); +} + +} // namespace op +} // namespace experimental +} // namespace arm_compute diff --git a/src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp b/src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp new file mode 100644 index 0000000000..4b15eaa942 --- /dev/null +++ b/src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/experimental/operators/CpuGemmDirectConv2d.h" + +#include "src/cpu/operators/CpuGemmDirectConv2d.h" + +namespace arm_compute +{ + +namespace experimental +{ +namespace op +{ + +struct CpuGemmDirectConv2d::Impl +{ + std::unique_ptr cpu_gemm{nullptr}; +}; + +CpuGemmDirectConv2d::CpuGemmDirectConv2d() : _impl(std::make_unique()) +{ + _impl->cpu_gemm = std::make_unique(); +} + +CpuGemmDirectConv2d::~CpuGemmDirectConv2d() = default; + +void CpuGemmDirectConv2d::configure(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + ITensorInfo *dst, + const Conv2dInfo &info) +{ + _impl->cpu_gemm->configure(src, weights, biases, dst, info); +} + +Status CpuGemmDirectConv2d::validate(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *dst, + const Conv2dInfo &info) +{ + return cpu::CpuGemmDirectConv2d::validate(src, weights, biases, dst, info); +} + +void CpuGemmDirectConv2d::run(ITensorPack &tensors) +{ + _impl->cpu_gemm->run(tensors); +} + +void CpuGemmDirectConv2d::prepare(ITensorPack &constants) +{ + _impl->cpu_gemm->prepare(constants); +} + +experimental::MemoryRequirements CpuGemmDirectConv2d::workspace() const +{ + return _impl->cpu_gemm->workspace(); +} + +} // namespace op +} // namespace experimental +} // namespace arm_compute diff --git a/src/runtime/experimental/operators/CpuTranspose.cpp b/src/runtime/experimental/operators/CpuTranspose.cpp new file mode 100644 index 0000000000..d0a2043f24 --- /dev/null +++ b/src/runtime/experimental/operators/CpuTranspose.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/experimental/operators/CpuTranspose.h" + +#include "src/cpu/operators/CpuTranspose.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace op +{ + +struct CpuTranspose::Impl +{ + std::unique_ptr op{nullptr}; +}; + +CpuTranspose::CpuTranspose() : _impl(std::make_unique()) +{ + _impl->op = std::make_unique(); +} + +CpuTranspose::~CpuTranspose() = default; + +void CpuTranspose::configure(const ITensorInfo *src, ITensorInfo *dst) +{ + _impl->op->configure(src, dst); +} + +Status CpuTranspose::validate(const ITensorInfo *src, const ITensorInfo *dst) +{ + return cpu::CpuTranspose::validate(src, dst); +} + +void CpuTranspose::run(ITensorPack &tensors) +{ + _impl->op->run(tensors); +} + +} // namespace op +} // namespace experimental +} // namespace arm_compute diff --git a/src/runtime/experimental/operators/CpuWinogradConv2d.cpp b/src/runtime/experimental/operators/CpuWinogradConv2d.cpp new file mode 100644 index 0000000000..58b476197d --- /dev/null +++ b/src/runtime/experimental/operators/CpuWinogradConv2d.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/cpu/operators/CpuWinogradConv2d.h" + +#include "arm_compute/core/Utils.h" +#include "arm_compute/runtime/experimental/operators/CpuWinogradConv2d.h" + +#include "support/Cast.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace op +{ +using namespace arm_compute::experimental; +using namespace arm_compute::utils::cast; + +struct CpuWinogradConv2d::Impl +{ + std::unique_ptr op{nullptr}; +}; + +CpuWinogradConv2d::CpuWinogradConv2d() : _impl(std::make_unique()) +{ + _impl->op = std::make_unique(); +} + +CpuWinogradConv2d::~CpuWinogradConv2d() = default; + +void CpuWinogradConv2d::configure(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + ITensorInfo *dst, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info, + bool enable_fast_math) +{ + _impl->op->configure(src, weights, biases, dst, conv_info, act_info, enable_fast_math); +} +Status CpuWinogradConv2d::validate(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *dst, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info, + bool enable_fast_math) +{ + return cpu::CpuWinogradConv2d::validate(src, weights, biases, dst, conv_info, act_info, enable_fast_math); +} + +void CpuWinogradConv2d::run(ITensorPack &tensors) +{ + _impl->op->run(tensors); +} + +void CpuWinogradConv2d::prepare(ITensorPack &tensors) +{ + _impl->op->prepare(tensors); +} + +experimental::MemoryRequirements CpuWinogradConv2d::workspace() const +{ + return _impl->op->workspace(); +} + +} // namespace op +} // namespace experimental +} // namespace arm_compute diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index 5763938d3c..7085f1facc 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Arm Limited. +# Copyright (c) 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -72,6 +72,7 @@ cc_binary( "NEON/*.h", "validation/NEON/**/*.cpp", "validation/NEON/**/*.h", + "validation/runtime/experimental/**/*.cpp", "*.cpp", "datasets/*.h", "instruments/*.h", diff --git a/tests/SConscript b/tests/SConscript index 0907c5713b..9f8bb54dec 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (c) 2017-2023,2024 Arm Limited. +# Copyright (c) 2017-2023, 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -23,6 +23,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import os.path +import SCons Import('env') Import('vars') @@ -156,6 +157,9 @@ if env['neon']: files_validation += Glob(env['external_tests_dir'] + '/tests/validation/NEON/' + filter_pattern) files_validation += Glob('validation/cpu/unit/*.cpp') + # Add wrapper tests + files_validation += Glob('validation/runtime/experimental/*/' + filter_pattern) + extra_link_flags = [] if env['os'] == 'android': test_env.Append(LIBS = ["log"]) @@ -187,7 +191,29 @@ if env['fixed_format_kernels'] and test_env['validation_tests']: test_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS']) if test_env['validation_tests']: - arm_compute_validation_framework = env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ]) + #The following set up only works for posix system, RANLIBCOM env variable isn't available on win32 HOST_OS + if test_env['HOST_OS'] == 'posix': + #Set up to use temp file for long command when building and linking libraries + test_env['TEMPFILE'] = SCons.Platform.TempFileMunge + + #To use temp file for any command, the following pattern should be used: + # env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}" + #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147 + #The commands' string are taken from https://github.com/SCons/scons + #The commands' explanations are taken from Scons userguide + + #The command line used to compile C++ source file to an object files + test_env['CXXCOM'] = "${TEMPFILE('"+ test_env['CXXCOM'] + "')}" + #The command line used to generate a static library from object files + test_env['ARCOM'] = "${TEMPFILE('"+ test_env['ARCOM'] + "')}" + #The command line used to index a static library archive + test_env['RANLIBCOM'] = "${TEMPFILE('"+ test_env['RANLIBCOM'] + "')}" + #The command line used to link object files into an executable + test_env['LINKCOM'] = "${TEMPFILE('"+ test_env['LINKCOM'] + "')}" + #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files + test_env['TEMPFILEDIR'] = test_env['build_dir'] + + arm_compute_validation_framework = test_env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ]) Depends(arm_compute_validation_framework , arm_compute_test_framework) program_objects = files_validation + common_objects diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index c1e61444a8..396d2efede 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_SHAPE_DATASETS_H -#define ARM_COMPUTE_TEST_SHAPE_DATASETS_H +#ifndef ACL_TESTS_DATASETS_SHAPEDATASETS_H +#define ACL_TESTS_DATASETS_SHAPEDATASETS_H #include "arm_compute/core/TensorShape.h" #include "tests/framework/datasets/Datasets.h" @@ -1194,7 +1194,21 @@ class Large2DNonMaxSuppressionShapes final : public ShapeDataset } }; +/** Data set containing large 2D tensor shapes. */ +class Large2DMeanStdDevNormalizationShapes final : public ShapeDataset +{ +public: + Large2DMeanStdDevNormalizationShapes() + : ShapeDataset("Shape", + { + TensorShape{ 1245U, 652U }, + TensorShape{ 1048576U, 32U } + }) + { + } +}; + } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SHAPE_DATASETS_H */ +#endif // ACL_TESTS_DATASETS_SHAPEDATASETS_H diff --git a/tests/framework/Macros.h b/tests/framework/Macros.h index 5ce0842864..09e01b0b0c 100644 --- a/tests/framework/Macros.h +++ b/tests/framework/Macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_FRAMEWORK_MACROS -#define ARM_COMPUTE_TEST_FRAMEWORK_MACROS +#ifndef ACL_TESTS_FRAMEWORK_MACROS_H +#define ACL_TESTS_FRAMEWORK_MACROS_H #include "Framework.h" #include "Registrars.h" @@ -133,12 +133,16 @@ void do_setup() override \ { \ framework::Framework::get().set_new_fixture_call(true); \ - apply(this, &FIXTURE::setup, _data); \ - configure_target(); \ - if(!framework::Framework::get().configure_only()) \ + apply(this, &FIXTURE::setup, _data); \ + \ + if(!_skip_test) \ { \ - allocate_and_run_target(); \ - compute_reference(); \ + configure_target(); \ + if(!framework::Framework::get().configure_only()) \ + { \ + allocate_and_run_target(); \ + compute_reference(); \ + } \ } \ } #define FIXTURE_RUN(FIXTURE) \ @@ -324,4 +328,4 @@ // // TEST CASE MACROS END // -#endif /* ARM_COMPUTE_TEST_FRAMEWORK_MACROS */ +#endif // ACL_TESTS_FRAMEWORK_MACROS_H diff --git a/tests/framework/SConscript b/tests/framework/SConscript index 450ffd77b0..cca5169099 100644 --- a/tests/framework/SConscript +++ b/tests/framework/SConscript @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (c) 2017-2022 Arm Limited. +# Copyright (c) 2017-2022, 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -75,6 +75,26 @@ if not framework_env['mali']: else: framework_env.Append(CPPDEFINES = ['MALI_ENABLED']) +#The following set up only works for posix system, RANLIBCOM env variable isn't available on win32 HOST_OS +if framework_env['HOST_OS'] == 'posix': + #Set up to use temp file for long command when building and linking libraries + framework_env['TEMPFILE'] = SCons.Platform.TempFileMunge + + #To use temp file for any command, the following pattern should be used: + # env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}" + #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147 + #The commands' string are taken from https://github.com/SCons/scons + #The commands' explanations are taken from Scons userguide + + #The command line used to compile C++ source file to an object file + framework_env['CXXCOM'] = "${TEMPFILE('"+ framework_env['CXXCOM'] + "')}" + #The command line used to generate a static library from object files + framework_env['ARCOM'] = "${TEMPFILE('"+ framework_env['ARCOM'] + "')}" + #The command line used to index a static library archive + framework_env['RANLIBCOM'] = "${TEMPFILE('"+ framework_env['RANLIBCOM'] + "')}" + #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files + framework_env['TEMPFILEDIR'] = framework_env['build_dir'] + arm_compute_test_framework = framework_env.StaticLibrary('arm_compute_test_framework', files) Default(arm_compute_test_framework) diff --git a/tests/validation/CL/CropResize.cpp b/tests/validation/CL/CropResize.cpp index f1fae3d5cc..b361cfdd91 100644 --- a/tests/validation/CL/CropResize.cpp +++ b/tests/validation/CL/CropResize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -91,7 +91,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( // *INDENT-ON* TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, CLCropResizeFixture, @@ -104,7 +103,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01); } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/CL/LSTMLayer.cpp b/tests/validation/CL/LSTMLayer.cpp index a550613b0c..02aef1f2af 100644 --- a/tests/validation/CL/LSTMLayer.cpp +++ b/tests/validation/CL/LSTMLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,95 +43,106 @@ RelativeTolerance tolerance_f32(0.001f); RelativeTolerance tolerance_f16(half(0.1)); } // namespace +using framework::dataset::make; + TEST_SUITE(CL) TEST_SUITE(LSTMLayer) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 2U), 1, DataType::U8), // Wrong data type - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong input weights size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong recurrent weights size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell bias size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell state size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong output size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong scratch size - }), - framework::dataset::make("InputWeightsInfo", { TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - })), - framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - })), - framework::dataset::make("CellBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(30U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - })), - framework::dataset::make("ProjectionBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - })), - framework::dataset::make("CellStateInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(11U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - })), - framework::dataset::make("ScratchInfo", { TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(12U, 2U), 1, DataType::F32), - })), - framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false, false })), - input_info, input_weights_info, recurrent_weights_info, cell_bias_info, projection_bias_info, cell_state_info, output_info, scratch_info, info, expected) +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + make("InputInfo", { + TensorInfo(TensorShape(8U, 2U), 1, DataType::U8), // Wrong data type + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong input weights size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong recurrent weights size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell bias size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell state size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong output size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong scratch size + }), + make("InputWeightsInfo", { + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + }), + make("RecurrentWeightsInfo", { + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + }), + make("CellBiasInfo", { + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(30U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + }), + make("ProjectionBiasInfo", { + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + }), + make("CellStateInfo", { + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(11U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + }), + make("OutputInfo", { + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + }), + make("ScratchInfo", { + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(12U, 2U), 1, DataType::F32), + }), + make("ActivationInfo", { + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + }), + make("Expected", { false, false, false, false, false, false, false, false })), + input_info, input_weights_info, recurrent_weights_info, cell_bias_info, projection_bias_info, cell_state_info, output_info, scratch_info, info, expected) { LSTMParams lstm_params_info; auto cell_bias_clone = cell_bias_info.clone(); @@ -154,11 +165,14 @@ template using CLLSTMLayerFixture = LSTMLayerValidationFixture, T>; TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallLSTMLayerDataset(), framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("ProjectionOpt", { true, false })), - framework::dataset::make("PeepholeOpt", { true, false })), - framework::dataset::make("UseLayerNorm", { true, false }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture, framework::DatasetMode::ALL, + combine( + datasets::SmallLSTMLayerDataset(), + make("DataType", DataType::F32), + make("ProjectionOpt", { true, false }), + make("PeepholeOpt", { true, false }), + make("UseLayerNorm", { true, false }), + make("UseMemoryManager", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); @@ -167,11 +181,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture, framework::DatasetMo TEST_SUITE_END() // FP32 TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallLSTMLayerDataset(), framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("ProjectionOpt", { true, false })), - framework::dataset::make("PeepholeOpt", { true, false })), - framework::dataset::make("UseLayerNorm", { true, false }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture, framework::DatasetMode::ALL, + combine( + datasets::SmallLSTMLayerDataset(), + make("DataType", DataType::F16), + make("ProjectionOpt", { true, false }), + make("PeepholeOpt", { true, false }), + make("UseLayerNorm", { true, false }), + make("UseMemoryManager", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); diff --git a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp index cdeb622130..6f9dd2e2d6 100644 --- a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,7 @@ namespace { /** Tolerance for float operations */ RelativeTolerance tolerance_f16(half(0.2f)); -RelativeTolerance tolerance_f32(1e-8f); +RelativeTolerance tolerance_f32(0.001f); } // namespace TEST_SUITE(CL) @@ -83,7 +83,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevNormalizationLayerFixture, fr // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DMeanStdDevNormalizationShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("InPlace", { false, true })), framework::dataset::make("Epsilon", { 1e-8 }))) @@ -102,7 +102,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevNormalizationLayerFixture, f // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DMeanStdDevNormalizationShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("InPlace", { false, true })), framework::dataset::make("Epsilon", { 1e-8 }))) diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt index 448e96c4f9..32ec9c2e44 100644 --- a/tests/validation/CMakeLists.txt +++ b/tests/validation/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Arm Limited. +# Copyright (c) 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -142,5 +142,11 @@ if(ENABLE_NEON) NEON/UNIT/DynamicTensor.cpp NEON/UNIT/TensorAllocator.cpp NEON/UNIT/MemoryManager.cpp - NEON/UNIT/RuntimeContext.cpp) + NEON/UNIT/RuntimeContext.cpp + runtime/experimental/operators/CpuActivation.cpp + runtime/experimental/operators/CpuGemm.cpp + runtime/experimental/operators/CpuGemmConv2d.cpp + runtime/experimental/operators/CpuGemmDirectConv2d.cpp + runtime/experimental/operators/CpuTranspose.cpp + runtime/experimental/operators/CpuWinogradConv2d.cpp) endif() diff --git a/tests/validation/CPP/Permute.cpp b/tests/validation/CPP/Permute.cpp index 9495fa738e..a128006890 100644 --- a/tests/validation/CPP/Permute.cpp +++ b/tests/validation/CPP/Permute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -120,6 +120,24 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture, framework::DatasetMo TEST_SUITE_END() // QASYMM8_SINGED +#ifdef ARM_COMPUTE_ENABLE_FP16 +TEST_SUITE(F16) +FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture, framework::DatasetMode::PRECOMMIT, + PermuteParametersSmall * framework::dataset::make("DataType", DataType::F16)) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture, framework::DatasetMode::NIGHTLY, + PermuteParametersLarge * framework::dataset::make("DataType", DataType::F16)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + TEST_SUITE_END() TEST_SUITE_END() } // namespace validation diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp index 73f5de68ac..39be3ebfd7 100644 --- a/tests/validation/NEON/ActivationLayer.cpp +++ b/tests/validation/NEON/ActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,7 +39,6 @@ #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/ActivationLayerFixture.h" - #include "arm_compute/Acl.hpp" #include "support/AclRequires.h" @@ -167,15 +166,6 @@ AbsoluteTolerance tolerance_qasymm8(ActivationLayerInfo::ActivationFunc constexpr AbsoluteTolerance tolerance_qsymm16(1); -/** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", -{ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - DataType::F16, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - DataType::F32, -}); - const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(), framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::HARD_SWISH, ActivationLayerInfo::ActivationFunction::SWISH })); @@ -188,9 +178,17 @@ void test_float_sqrt_boundary_value() constexpr auto vector_size = uint32_t{ 16 }; auto data_type = DataType::F32; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 data_type = std::is_same::value ? DataType::F16 : data_type; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + + if(data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + + return; + } const auto boundary_value_vector = std::vector { @@ -336,7 +334,7 @@ template using NEActivationLayerFixture = ActivationValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) TEST_CASE(SqrtBoundaryValue, framework::DatasetMode::ALL) { @@ -346,11 +344,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture, framework::Data framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, relative_tolerance(_data_type, _function), 0.f, absolute_tolerance(_data_type, _function)); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, relative_tolerance(_data_type, _function), 0.f, absolute_tolerance(_data_type, _function)); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) TEST_CASE(SqrtBoundaryValue, framework::DatasetMode::ALL) diff --git a/tests/validation/NEON/AddMulAdd.cpp b/tests/validation/NEON/AddMulAdd.cpp index 77e3d80fe6..5bb58ecb13 100644 --- a/tests/validation/NEON/AddMulAdd.cpp +++ b/tests/validation/NEON/AddMulAdd.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -127,27 +127,42 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture, framework::Data TEST_SUITE_END() // F32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulAddFloatFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), ActivationFunctionsDataset)) { - // Validate outputs - validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), ActivationFunctionsDataset)) { - // Validate outputs - validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE_END() // Float diff --git a/tests/validation/NEON/ArgMinMax.cpp b/tests/validation/NEON/ArgMinMax.cpp index 91b8128dea..acb127498e 100644 --- a/tests/validation/NEON/ArgMinMax.cpp +++ b/tests/validation/NEON/ArgMinMax.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -142,7 +142,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, TEST_SUITE_END() // S32 TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEArgMinMaxValidationFixture_F16_S32, @@ -153,8 +154,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, AxisDataset), OpsDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, @@ -166,11 +175,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, AxisDataset), OpsDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp index 535c3e634e..7a7aa52041 100644 --- a/tests/validation/NEON/ArithmeticAddition.cpp +++ b/tests/validation/NEON/ArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -209,17 +209,25 @@ TEST_SUITE_END() // S32 TEST_SUITE_END() // Integer TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp index 8886ca2db5..9a6032cd9e 100644 --- a/tests/validation/NEON/ArithmeticSubtraction.cpp +++ b/tests/validation/NEON/ArithmeticSubtraction.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -276,18 +276,26 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture, framew TEST_SUITE_END() // S32 TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/BatchConcatenateLayer.cpp b/tests/validation/NEON/BatchConcatenateLayer.cpp index 6eafe82f8a..e275a759cb 100644 --- a/tests/validation/NEON/BatchConcatenateLayer.cpp +++ b/tests/validation/NEON/BatchConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -87,25 +87,41 @@ template using NEBatchConcatenateLayerFixture = ConcatenateLayerValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", 3))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", 3))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()), diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp index 50eaf0c667..45661ab5d3 100644 --- a/tests/validation/NEON/BatchNormalizationLayer.cpp +++ b/tests/validation/NEON/BatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,9 +50,9 @@ namespace { RelativeTolerance rel_tolerance_f32(0.05f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance abs_tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 constexpr AbsoluteTolerance abs_tolerance_f16(0.015f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 const auto act_infos = framework::dataset::make("ActivationInfo", { @@ -139,7 +139,7 @@ FIXTURE_DATA_TEST_CASE(RandomLarge, NEBatchNormalizationLayerFixture, fra } TEST_SUITE_END() // F32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RandomSmall, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(), combine(framework::dataset::make("UseBeta", { false, true }), @@ -148,8 +148,16 @@ FIXTURE_DATA_TEST_CASE(RandomSmall, NEBatchNormalizationLayerFixture, fram framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { - // Validate output - validate(Accessor(_target), _reference, abs_tolerance_f16, 0); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, abs_tolerance_f16, 0); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RandomLarge, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::LargeRandomBatchNormalizationLayerDataset(), @@ -159,11 +167,19 @@ FIXTURE_DATA_TEST_CASE(RandomLarge, NEBatchNormalizationLayerFixture, fram framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { - // Validate output - validate(Accessor(_target), _reference, abs_tolerance_f16, 0); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, abs_tolerance_f16, 0); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float TEST_SUITE_END() // BatchNormalizationLayer diff --git a/tests/validation/NEON/BoundingBoxTransform.cpp b/tests/validation/NEON/BoundingBoxTransform.cpp index 2ca2434150..4ef18352ee 100644 --- a/tests/validation/NEON/BoundingBoxTransform.cpp +++ b/tests/validation/NEON/BoundingBoxTransform.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,10 +45,10 @@ namespace { RelativeTolerance relative_tolerance_f32(0.01f); AbsoluteTolerance absolute_tolerance_f32(0.001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance relative_tolerance_f16(half(0.2)); AbsoluteTolerance absolute_tolerance_f16(half(0.02f)); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 constexpr AbsoluteTolerance tolerance_qasymm16(1); @@ -124,16 +124,24 @@ FIXTURE_DATA_TEST_CASE(BoundingBox, NEBoundingBoxTransformFixture, framew } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(BoundingBox, NEBoundingBoxTransformFixture, framework::DatasetMode::ALL, combine(combine(DeltaDataset, BboxInfoDataset), framework::dataset::make("DataType", { DataType::F16 }))) { - // Validate output - validate(Accessor(_target), _reference, relative_tolerance_f16, 0.03f, absolute_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, relative_tolerance_f16, 0.03f, absolute_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE_END() // Float diff --git a/tests/validation/NEON/Cast.cpp b/tests/validation/NEON/Cast.cpp index b56594546b..668c60545b 100644 --- a/tests/validation/NEON/Cast.cpp +++ b/tests/validation/NEON/Cast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023 Arm Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -140,7 +140,15 @@ using NECastToQASYMM8_SIGNEDFixture = CastValidationFixture, CastQASYMM8_SIGNEDtoS16Dataset, one_tolerance) CAST_SUITE(QASYMM8_SIGNED_to_S32, DataType::QASYMM8_SIGNED, DataType::S32, NECastToS32Fixture, CastQASYMM8_SIGNEDtoS32Dataset, one_tolerance) CAST_SUITE(QASYMM8_SIGNED_to_F32, DataType::QASYMM8_SIGNED, DataType::F32, NECastToF32Fixture, CastQASYMM8_SIGNEDtoF32Dataset, one_tolerance) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 CAST_SUITE(QASYMM8_SIGNED_to_F16, DataType::QASYMM8_SIGNED, DataType::F16, NECastToF16Fixture, CastQASYMM8_SIGNEDtoF16Dataset, one_tolerance) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 //QASYMM8 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 CAST_SUITE(QASYMM8_to_F16, DataType::QASYMM8, DataType::F16, NECastToF16Fixture, CastQASYMM8toF16Dataset, one_tolerance) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 CAST_SUITE(QASYMM8_to_F32, DataType::QASYMM8, DataType::F32, NECastToF32Fixture, CastQASYMM8toF32Dataset, one_tolerance) CAST_SUITE(QASYMM8_to_S32, DataType::QASYMM8, DataType::S32, NECastToS32Fixture, CastQASYMM8toS32Dataset, one_tolerance) @@ -177,26 +185,26 @@ CAST_SUITE(S16_to_S32, DataType::S16, DataType::S32, NECastToS32Fixture // S32 CAST_SUITE(S32_to_QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture, CastS32toQASYMM8_SIGNEDDataset, one_tolerance) CAST_SUITE(S32_to_QASYMM8, DataType::S32, DataType::QASYMM8, NECastToQASYMM8Fixture, CastS32toQASYMM8Dataset, one_tolerance) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 CAST_SUITE(S32_to_F16, DataType::S32, DataType::F16, NECastToF16Fixture, CastS32toF16Dataset, zero_tolerance) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 CAST_SUITE(S32_to_F32, DataType::S32, DataType::F32, NECastToF32Fixture, CastS32toF32Dataset, one_tolerance) CAST_SUITE(S32_to_U8, DataType::S32, DataType::U8, NECastToU8Fixture, CastS32toU8Dataset, one_tolerance) // F16 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 CAST_SUITE(F16_to_QASYMM8_SIGNED, DataType::F16, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture, CastF16toQASYMM8_SIGNEDDataset, one_tolerance) CAST_SUITE(F16_to_QASYMM8, DataType::F16, DataType::QASYMM8, NECastToQASYMM8Fixture, CastF16toQASYMM8Dataset, one_tolerance) CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, NECastToF32Fixture, CastF16toF32Dataset, zero_tolerance) CAST_SUITE(F16_to_S32, DataType::F16, DataType::S32, NECastToS32Fixture, CastF16toS32Dataset, one_tolerance) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 // F32 CAST_SUITE(F32_to_QASYMM8_SIGNED, DataType::F32, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture, CastF32toQASYMM8_SIGNEDDataset, one_tolerance) CAST_SUITE(F32_to_QASYMM8, DataType::F32, DataType::QASYMM8, NECastToQASYMM8Fixture, CastF32toQASYMM8Dataset, one_tolerance) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, NECastToF16Fixture, CastF32toF16Dataset, zero_tolerance) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, NECastToS32Fixture, CastF32toS32Dataset, one_tolerance) CAST_SUITE(F32_to_U8, DataType::F32, DataType::S32, NECastToS32Fixture, CastF32toS32Dataset, one_tolerance) diff --git a/tests/validation/NEON/ChannelShuffle.cpp b/tests/validation/NEON/ChannelShuffle.cpp index 9a2a9f24f0..c1590dc136 100644 --- a/tests/validation/NEON/ChannelShuffle.cpp +++ b/tests/validation/NEON/ChannelShuffle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -92,26 +92,42 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelShuffleLayerFixture, framewor TEST_SUITE_END() // U8 TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelShuffleLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallRandomChannelShuffleLayerDataset(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelShuffleLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeRandomChannelShuffleLayerDataset(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelShuffleLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallRandomChannelShuffleLayerDataset(), diff --git a/tests/validation/NEON/Comparisons.cpp b/tests/validation/NEON/Comparisons.cpp index b77bcdd4f0..868c39d306 100644 --- a/tests/validation/NEON/Comparisons.cpp +++ b/tests/validation/NEON/Comparisons.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -97,26 +97,42 @@ FIXTURE_DATA_TEST_CASE(RunSmall, TEST_SUITE_END() TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEComparisonFixture, framework::DatasetMode::PRECOMMIT, combine(run_small_dataset, framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEComparisonFixture, framework::DatasetMode::NIGHTLY, combine(run_large_dataset, framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/NEON/ConvertFullyConnectedWeights.cpp b/tests/validation/NEON/ConvertFullyConnectedWeights.cpp index 65dbbcc55b..c52b50fa5b 100644 --- a/tests/validation/NEON/ConvertFullyConnectedWeights.cpp +++ b/tests/validation/NEON/ConvertFullyConnectedWeights.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -97,22 +97,38 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvertFullyConnectedWeightsFixture, f } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEConvertFullyConnectedWeightsFixture, framework::DatasetMode::ALL, combine(datasets::Small3DShapes(), combine(params, framework::dataset::make("DataType", DataType::F16)))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEConvertFullyConnectedWeightsFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large3DShapes(), combine(params, framework::dataset::make("DataType", DataType::F16)))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEConvertFullyConnectedWeightsFixture, framework::DatasetMode::ALL, combine(datasets::Small3DShapes(), combine(params, framework::dataset::make("DataType", diff --git a/tests/validation/NEON/Convolution3D.cpp b/tests/validation/NEON/Convolution3D.cpp index 4185488742..76046a8b85 100644 --- a/tests/validation/NEON/Convolution3D.cpp +++ b/tests/validation/NEON/Convolution3D.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,11 +43,11 @@ namespace validation { namespace { -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 const RelativeTolerance rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */ const AbsoluteTolerance abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */ constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance for quantized tests */ @@ -140,17 +140,25 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DFixture, framework: } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DFixture, framework::DatasetMode::PRECOMMIT, combine(combine(data_precommit, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NDHWC }))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index d739d4e1a4..acbac5f776 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -74,16 +74,16 @@ const RelativeTolerance rel_tolerance_winograd_3x3_f32(0.05f); /**< Relat const AbsoluteTolerance abs_tolerance_f32(0.002f); /**< Absolute tolerance for FP32 types */ const AbsoluteTolerance abs_tolerance_1xN_f32(0.0041f); /**< Absolute tolerance for FP32 types */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 const AbsoluteTolerance tolerance_convolution_layer_f16(half(0.4f)); constexpr float tolerance_num_f16 = 0.15f; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 const RelativeTolerance rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */ const AbsoluteTolerance abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */ constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ #ifdef ARM_COMPUTE_ENABLE_SME // TODO(COMPMID-6011): SME kernels and the reference model use different rounding mode. @@ -96,9 +96,9 @@ constexpr AbsoluteTolerance tolerance_qasymm8(0.0); /**< Tolerance value /** CNN data types */ const auto CNNDataTypes = make("DataType", { -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 DataType::F16, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ DataType::F32, DataType::QASYMM8, }); @@ -648,9 +648,9 @@ FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture; +using NEWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture; DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip( make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16), @@ -673,37 +673,61 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip( make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })), input_info, weights_info, output_info, conv_info, fast_math, expected) { - ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true), - &weights_info.clone()->set_is_resizable(true), - &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math); - ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); + if(CPUInfo::get().has_fp16()) + { + ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true), + &weights_info.clone()->set_is_resizable(true), + &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE(Conv3x3) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, +FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), make("DataType", { DataType::F16 }), ActivationFunctionsDataset, make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, +FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), make("DataType", { DataType::F16 }), make("ActivationInfo", { ActivationLayerInfo() }), make("DataLayout", { DataLayout::NHWC }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // Conv3x3 TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // WinogradLayer #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS @@ -990,16 +1014,24 @@ FIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth, framework::DatasetMode::A validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); } -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#if defined(ARM_COMPUTE_ENABLE_FP16) FIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth, framework::DatasetMode::ALL, combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("DataLayout", { DataLayout::NHWC })), framework::dataset::make("ACL Scalar type", { DataType::F16 }))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16)); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16)); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(ARM_COMPUTE_ENABLE_BF16) template @@ -1031,16 +1063,24 @@ FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth, framework::Da validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); } -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#if defined(ARM_COMPUTE_ENABLE_FP16) FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth, framework::DatasetMode::ALL, combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("DataLayout", { DataLayout::NHWC })), framework::dataset::make("ACL Scalar type", { DataType::F16 }))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16)); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16)); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(ARM_COMPUTE_ENABLE_BF16) template @@ -1179,7 +1219,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework TEST_SUITE_END() // BFLOAT16 #endif /* defined(ARM_COMPUTE_ENABLE_BF16) */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), @@ -1187,11 +1227,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework: framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset)) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), diff --git a/tests/validation/NEON/CropResize.cpp b/tests/validation/NEON/CropResize.cpp index df7166bfdc..53614c7e51 100644 --- a/tests/validation/NEON/CropResize.cpp +++ b/tests/validation/NEON/CropResize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -92,7 +92,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( // *INDENT-ON* TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NECropResizeFixture, @@ -101,11 +101,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, combine(framework::dataset::make("IsOutOfBounds", { true, false }), framework::dataset::make("DataType", DataType::F16)))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp32, 0.01); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32, 0.01); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index b4c049f6f9..4ec2714957 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,10 +45,10 @@ namespace { constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ constexpr AbsoluteTolerance tolerance_quantized(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 const RelativeTolerance tolerance_fp16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr float tolerance_num_fp16 = 0.02f; /**< Tolerance number for FP16 tests -- follows a slightly stricter approach compared to ConvolutionLayer tests */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ +#endif /* ARM_COMPUTE_ENABLE_FP16*/ constexpr float tolerance_num_quant = 0.07f; /**< Tolerance number for quantized types */ const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3) @@ -276,15 +276,23 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1, framework::Da TEST_SUITE_END() // W5x1 TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) TEST_SUITE(W4x4) FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4, framework::DatasetMode::NIGHTLY, combine(combine(combine(data4x4, framework::dataset::make("DataType", DataType::F16)), data_layouts_dataset), add_bias_dataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // W4x4 TEST_SUITE(W3x3) @@ -293,15 +301,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerFixture3x3, framework data_layouts_dataset), add_bias_dataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)), data_layouts_dataset), add_bias_dataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // W3x3 TEST_SUITE(W1x1) @@ -309,8 +333,16 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1, framework::Dat data_layouts_dataset), add_bias_dataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // W1x1 TEST_SUITE(W5x1) @@ -318,12 +350,20 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1, framework::Dat data_layouts_dataset), add_bias_dataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // W5x1 TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp index 1c69d44a2b..11fc9d89ae 100644 --- a/tests/validation/NEON/DepthConcatenateLayer.cpp +++ b/tests/validation/NEON/DepthConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -84,25 +84,41 @@ template using NEDepthConcatenateLayerFixture = ConcatenateLayerValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", 2))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", 2))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()), diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp index 4972708144..bd7c8faa9b 100644 --- a/tests/validation/NEON/DepthConvertLayer.cpp +++ b/tests/validation/NEON/DepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -74,9 +74,9 @@ const auto DepthConvertLayerZeroShiftDataset = framework::dataset::make("Shif constexpr AbsoluteTolerance tolerance_qasymm8(1); constexpr AbsoluteTolerance tolerance_one_int32(1); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 constexpr AbsoluteTolerance tolerance_one_uint8(1); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ } // namespace TEST_SUITE(NEON) @@ -250,25 +250,41 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture, frame } TEST_SUITE_END() // U8_to_F32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(U8_to_F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toF16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toF16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // U8_to_F36 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(U16_to_U8) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset), @@ -338,7 +354,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, frame } TEST_SUITE_END() // S16_to_S32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16_to_QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToQASYMM8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF16toQASYMM8Dataset), @@ -346,8 +362,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToQASYMM8Fixture, fram DepthConvertLayerZeroShiftDataset), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_qasymm8); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToQASYMM8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toQASYMM8Dataset), @@ -355,8 +379,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToQASYMM8Fixture, fram DepthConvertLayerZeroShiftDataset), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_qasymm8); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16_to_QASYMM8 @@ -365,15 +397,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture, framework framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_one_uint8); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_one_uint8); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toU8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_one_uint8); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_one_uint8); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16_to_U8 @@ -382,15 +430,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture, framewor framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toF32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16_to_F32 @@ -399,15 +463,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture, framewor framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_one_int32); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_one_int32); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toS32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_one_int32); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_one_int32); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16_to_S32 @@ -419,8 +499,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerQuantizedToF16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerQASYMM8toF16Dataset), @@ -428,8 +516,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerQuantizedToF16Fixture, framewo framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toF16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F32_to_F16 @@ -455,19 +567,35 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture, frame framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS32toF16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // S32_to_F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(F32_to_S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toS32Dataset), diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp index e9609b7b72..2d948f3e32 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp @@ -50,10 +50,10 @@ namespace constexpr RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ constexpr AbsoluteTolerance tolerance_qasymm8_signed(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8_SIGNED */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_f16(half_float::half(0.02)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr float tolerance_num = 0.05f; /**< Tolerance number */ -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 const auto depth_multipliers = make("DepthMultiplier", { 1, 2, 8 }); const auto large_depth_multipliers = make("DepthMultiplier", { 5, 32 }); @@ -469,7 +469,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsLarge3x3, NEDepthwiseConvolutionLay TEST_SUITE_END() // Optimized TEST_SUITE_END() // F32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, @@ -483,7 +483,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerFixture, f make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { - validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), large_depth_multipliers), @@ -503,7 +519,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture, f make("DataLayout", { DataLayout::NHWC })), make("ActivationInfo", { ActivationLayerInfo() }))) { - validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE(Dilation) @@ -514,7 +538,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture, f make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { - validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), @@ -523,7 +555,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture, f make("DataLayout", { DataLayout::NHWC })), make("ActivationInfo", { ActivationLayerInfo() }))) { - validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // Dilation @@ -538,7 +578,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture, f make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), @@ -548,7 +596,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture, f make("DataLayout", { DataLayout::NHWC })), make("ActivationInfo", { ActivationLayerInfo() }))) { - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE(Dilation) @@ -561,7 +617,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture, f make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), @@ -571,7 +635,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture, f make("DataLayout", { DataLayout::NHWC })), make("ActivationInfo", { ActivationLayerInfo() }))) { - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // Dilation @@ -586,7 +658,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmallW3x3, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), @@ -596,7 +676,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmallW5x5, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), @@ -606,11 +694,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLargeW3x3, NEDepthwiseConvolutionLayerFixture using NEDequantizationLayerFixture = DequantizationValidationFixture; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture, framework::DatasetMode::PRECOMMIT, dataset_precommit_f16) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture, framework::DatasetMode::NIGHTLY, dataset_nightly_f16) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture, framework::DatasetMode::PRECOMMIT, dataset_precommit_f32) diff --git a/tests/validation/NEON/DilatedConvolutionLayer.cpp b/tests/validation/NEON/DilatedConvolutionLayer.cpp index fbfe8b8a7a..08ef68ecea 100644 --- a/tests/validation/NEON/DilatedConvolutionLayer.cpp +++ b/tests/validation/NEON/DilatedConvolutionLayer.cpp @@ -45,19 +45,19 @@ namespace validation namespace { const AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 const AbsoluteTolerance abs_tolerance_f16(0.3f); /**< Absolute tolerance value for comparing reference's output against implementation's output for DataType::F16 */ const RelativeTolerance rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr float tolerance_num_f16 = 0.07f; /**< Tolerance number for FP16 */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", { -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 DataType::F16, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ DataType::F32, DataType::QASYMM8, }); @@ -113,7 +113,7 @@ template using NEGEMMDilatedConvolutionLayerFixture = ConvolutionValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), @@ -121,8 +121,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture, fra framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("ActivationLayerInfo", ActivationLayerInfo()))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDilatedConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), @@ -130,11 +138,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerFixture, fra framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("ActivationLayerInfo", ActivationLayerInfo()))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp index 0779c9d388..c00e4d6789 100644 --- a/tests/validation/NEON/DirectConvolutionLayer.cpp +++ b/tests/validation/NEON/DirectConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -342,15 +342,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolutionLayerFixture, framewor ActivationFunctionsDataset), framework::dataset::make("DataLayout", DataLayout::NCHW))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEDirectConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_f16_nightly, framework::dataset::make("DataType", DataType::F16)), ActivationFunctionsDataset), framework::dataset::make("DataLayout", DataLayout::NCHW))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp index 0667ac73f9..3eab2a7a5f 100644 --- a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp +++ b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,9 +43,9 @@ namespace validation namespace { RelativeTolerance tolerance_fp32(0.000001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_fp16(0.01f); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(__aarch64__) constexpr AbsoluteTolerance tolerance_qasymm8(0); constexpr AbsoluteTolerance tolerance_qasymm8_signed(0); @@ -64,23 +64,39 @@ template using NEAbsLayerQuantizedFixture = AbsQuantizedValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEAbsLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/ElementwiseDivision.cpp b/tests/validation/NEON/ElementwiseDivision.cpp index 95db4ad5fd..6bd1e16642 100644 --- a/tests/validation/NEON/ElementwiseDivision.cpp +++ b/tests/validation/NEON/ElementwiseDivision.cpp @@ -49,11 +49,11 @@ AbsoluteTolerance tolerance_zero_s32(0); // Tolerance for S32 division const auto ElementwiseDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("DataType", DataType::S32)); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_fp16(static_cast(0.01f)); const auto ElementwiseDivisionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ const auto ElementwiseDivisionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); @@ -126,16 +126,24 @@ TEST_SUITE_END() // F32 TEST_SUITE_END() // DynamicShape TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP16Dataset), InPlaceDataSet)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset), diff --git a/tests/validation/NEON/ElementwiseExpLayer.cpp b/tests/validation/NEON/ElementwiseExpLayer.cpp index 31cd78626f..0f1ada14c3 100644 --- a/tests/validation/NEON/ElementwiseExpLayer.cpp +++ b/tests/validation/NEON/ElementwiseExpLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,9 +43,9 @@ namespace validation namespace { RelativeTolerance tolerance_fp32(0.000001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_fp16(0.01f); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(__aarch64__) constexpr AbsoluteTolerance tolerance_qasymm8(0); @@ -66,23 +66,39 @@ template using NEExpLayerQuantizedFixture = ExpQuantizedValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEExpLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/ElementwiseLog.cpp b/tests/validation/NEON/ElementwiseLog.cpp index 1175903dac..67f4b8c16c 100644 --- a/tests/validation/NEON/ElementwiseLog.cpp +++ b/tests/validation/NEON/ElementwiseLog.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,9 +43,9 @@ namespace validation namespace { RelativeTolerance tolerance_fp32(0.000001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_fp16(0.01f); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(__aarch64__) constexpr AbsoluteTolerance tolerance_qasymm8(0); @@ -66,23 +66,39 @@ template using NELogLayerQuantizedFixture = LogQuantizedValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NELogLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NELogLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NELogLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/ElementwiseMax.cpp b/tests/validation/NEON/ElementwiseMax.cpp index 61421ab3e5..97a10e0ed6 100644 --- a/tests/validation/NEON/ElementwiseMax.cpp +++ b/tests/validation/NEON/ElementwiseMax.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,10 +56,10 @@ const auto ElementwiseMaxS32Dataset = combine(combine(framework::dataset::make(" DataType::S32)); const auto ElementwiseMaxS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), framework::dataset::make("DataType", DataType::S16)); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 const auto ElementwiseMaxFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ const auto ElementwiseMaxFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); @@ -188,16 +188,24 @@ TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset), InPlaceDataSet)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset), diff --git a/tests/validation/NEON/ElementwiseMin.cpp b/tests/validation/NEON/ElementwiseMin.cpp index a134eb354d..bf45544668 100644 --- a/tests/validation/NEON/ElementwiseMin.cpp +++ b/tests/validation/NEON/ElementwiseMin.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,10 +56,10 @@ const auto ElementwiseMinS32Dataset = combine(combine(framework::dataset::make(" DataType::S32)); const auto ElementwiseMinS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), framework::dataset::make("DataType", DataType::S16)); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 const auto ElementwiseMinFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ const auto ElementwiseMinFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); @@ -188,16 +188,24 @@ TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset), InPlaceDataSet)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset), diff --git a/tests/validation/NEON/ElementwiseNegation.cpp b/tests/validation/NEON/ElementwiseNegation.cpp index 5b8ae8fc64..ab7025ff22 100644 --- a/tests/validation/NEON/ElementwiseNegation.cpp +++ b/tests/validation/NEON/ElementwiseNegation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,9 +43,9 @@ namespace validation namespace { RelativeTolerance tolerance_fp32(0.000001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_fp16(0.01f); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(__aarch64__) constexpr AbsoluteTolerance tolerance_qasymm8(0); constexpr AbsoluteTolerance tolerance_qasymm8_signed(0); @@ -64,25 +64,41 @@ template using NENegLayerQuantizedFixture = NegQuantizedValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("InPlace", { true, false }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("InPlace", { false }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), diff --git a/tests/validation/NEON/ElementwisePower.cpp b/tests/validation/NEON/ElementwisePower.cpp index 9ac9eec280..c1fa48b95f 100644 --- a/tests/validation/NEON/ElementwisePower.cpp +++ b/tests/validation/NEON/ElementwisePower.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,11 +44,11 @@ namespace { RelativeTolerance tolerance_fp32(0.001f); /** Input data sets **/ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_fp16(static_cast(0.01f)); const auto ElementwisePowerFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ const auto ElementwisePowerFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); @@ -91,16 +91,24 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // *INDENT-ON* TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset), InPlaceDataSet)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(F32) diff --git a/tests/validation/NEON/ElementwiseRound.cpp b/tests/validation/NEON/ElementwiseRound.cpp index 620618cb0b..04c14ac430 100644 --- a/tests/validation/NEON/ElementwiseRound.cpp +++ b/tests/validation/NEON/ElementwiseRound.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -55,23 +55,39 @@ template using NERoundLayerQuantizedFixture = RoundQuantizedValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NERoundLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp index 80788c893f..fa720d016f 100644 --- a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp +++ b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,9 +43,9 @@ namespace validation namespace { RelativeTolerance tolerance_fp32(0.000001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_fp16(0.01f); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(__aarch64__) constexpr AbsoluteTolerance tolerance_qasymm8(0); constexpr AbsoluteTolerance tolerance_qasymm8_signed(0); @@ -83,23 +83,39 @@ template using NERsqrtLayerQuantizedFixture = RsqrtQuantizedValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NERsqrtLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/ElementwiseSin.cpp b/tests/validation/NEON/ElementwiseSin.cpp index 9c2d7ae268..348157e4e3 100644 --- a/tests/validation/NEON/ElementwiseSin.cpp +++ b/tests/validation/NEON/ElementwiseSin.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,9 +43,9 @@ namespace validation namespace { AbsoluteTolerance tolerance_fp32(0.00001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 AbsoluteTolerance tolerance_fp16(0.0005f); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 constexpr AbsoluteTolerance tolerance_qasymm8(0); constexpr AbsoluteTolerance tolerance_qasymm8_signed(0); } // namespace @@ -59,23 +59,39 @@ template using NESinLayerQuantizedFixture = SinQuantizedValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NESinLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NESinLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NESinLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/ElementwiseSquareDiff.cpp b/tests/validation/NEON/ElementwiseSquareDiff.cpp index 9a86b541de..3c303449ab 100644 --- a/tests/validation/NEON/ElementwiseSquareDiff.cpp +++ b/tests/validation/NEON/ElementwiseSquareDiff.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,9 +43,9 @@ namespace validation namespace { RelativeTolerance tolerance_fp32(0.000001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_fp16(0.01f); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Input data sets **/ const auto ElementwiseSquaredDiffQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)), @@ -62,10 +62,10 @@ const auto ElementwiseSquaredDiffS32Dataset = combine(combine(framework::dataset DataType::S32)); const auto ElementwiseSquaredDiffS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), framework::dataset::make("DataType", DataType::S16)); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 const auto ElementwiseSquaredDiffFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ const auto ElementwiseSquaredDiffFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); @@ -185,16 +185,24 @@ TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset), InPlaceDataSet)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset), diff --git a/tests/validation/NEON/Flatten.cpp b/tests/validation/NEON/Flatten.cpp index 4ebd897c7a..266092ea64 100644 --- a/tests/validation/NEON/Flatten.cpp +++ b/tests/validation/NEON/Flatten.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -81,22 +81,40 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture, framework::Datase } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEFlattenLayerFixture, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + // Only validate if the cpu architecture supports FP16. + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Large3DShapes(), datasets::Large4DShapes()), framework::dataset::make("DataType", DataType::F16))) { // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float TEST_SUITE_END() // FlattenLayer diff --git a/tests/validation/NEON/Floor.cpp b/tests/validation/NEON/Floor.cpp index 3cd1033ef9..758f8aa4b3 100644 --- a/tests/validation/NEON/Floor.cpp +++ b/tests/validation/NEON/Floor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2022-2023 Arm Limited. + * Copyright (c) 2017-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -96,20 +96,36 @@ template using NEFloorFixture = FloorValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEFloorFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEFloorFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEFloorFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32))) diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp index ee7e56227d..d3a1cbd565 100644 --- a/tests/validation/NEON/FullyConnectedLayer.cpp +++ b/tests/validation/NEON/FullyConnectedLayer.cpp @@ -48,11 +48,11 @@ namespace /** Tolerance for float operations */ constexpr RelativeTolerance rel_tolerance_f32(0.01f); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance abs_tolerance_f32(0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 const AbsoluteTolerance abs_tolerance_f16(0.3f); /**< Absolute tolerance value for comparing reference's output against implementation's output for DataType::F16 */ const RelativeTolerance rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr float tolerance_num_f16 = 0.07f; /**< Tolerance number for FP16 */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ +#endif /* ARM_COMPUTE_ENABLE_FP16*/ /** Tolerance for quantized asymmetric operations */ constexpr AbsoluteTolerance tolerance_qasymm8(1); @@ -61,9 +61,9 @@ constexpr AbsoluteTolerance tolerance_qasymm8_signed(1); /** CNN data types */ const auto CNNDataTypes = make("DataType", { -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 DataType::F16, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ DataType::F32, }); @@ -304,15 +304,23 @@ template using NEFullyConnectedLayerDynamicBiasFixture = FullyConnectedWithDynamicBiasFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::F16), NoActivationFunctionDataset)) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::FullyConnectedLayerWithActivationDataset(), @@ -320,16 +328,32 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture, fr make("DataType", DataType::F16), ActivationFunctionsDataset)) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::F16), NoActivationFunctionDataset)) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), make("DataType", DataType::F16), @@ -338,7 +362,7 @@ FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFix { } TEST_SUITE_END() -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, diff --git a/tests/validation/NEON/FuseBatchNormalization.cpp b/tests/validation/NEON/FuseBatchNormalization.cpp index 62265c6ac9..375e604281 100644 --- a/tests/validation/NEON/FuseBatchNormalization.cpp +++ b/tests/validation/NEON/FuseBatchNormalization.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,9 +39,9 @@ namespace validation namespace { AbsoluteTolerance absolute_tolerance_f32(0.001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 AbsoluteTolerance absolute_tolerance_f16(0.2f); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ } // namespace template @@ -108,7 +108,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationConvFixture, fra validate(Accessor(_target_b), _reference_b, absolute_tolerance_f32); } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEFuseBatchNormalizationConvFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine( @@ -120,9 +120,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEFuseBatchNormalizationConvFixture, fram with_gamma_values), with_beta_values)) { - // Validate outputs - validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16); - validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16); + validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationConvFixture, framework::DatasetMode::NIGHTLY, @@ -135,12 +143,20 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationConvFixture, fram with_gamma_values), with_beta_values)) { - // Validate outputs - validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16); - validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16); + validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float TEST_SUITE_END() // Convolution TEST_SUITE(DepthwiseConvolution) @@ -177,7 +193,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationDWCFixture, fram } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEFuseBatchNormalizationDWCFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine( @@ -189,9 +205,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEFuseBatchNormalizationDWCFixture, frame with_gamma_values), with_beta_values)) { - // Validate outputs - validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16); - validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16); + validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationDWCFixture, framework::DatasetMode::NIGHTLY, @@ -204,13 +228,21 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationDWCFixture, frame with_gamma_values), with_beta_values)) { - // Validate outputs - validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16); - validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16); + validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float TEST_SUITE_END() // DepthwiseConvolution TEST_SUITE_END() // FuseBatchNormalization diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp index 5f6a402204..bf74873385 100644 --- a/tests/validation/NEON/GEMM.cpp +++ b/tests/validation/NEON/GEMM.cpp @@ -56,17 +56,17 @@ using framework::dataset::make; namespace { constexpr AbsoluteTolerance tolerance_f(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance rel_tolerance_f16(half(0.2)); /**< Relative tolerance value for comparing reference's output against implementation's output for FP16 data types */ const AbsoluteTolerance abs_tolerance_f16(0.2f); /**< Absolute tolerance value for comparing reference's output against implementation's output for FP16 data types */ constexpr float tolerance_num = 0.07f; /**< Tolerance number for FP16 data types */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** CNN data types */ const auto CNNDataTypes = make("DataType", { -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 DataType::F16, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ DataType::F32, }); @@ -398,21 +398,38 @@ DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine( ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), make("ReshapeWeights", { true, false })), make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } + } FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(), make("ReshapeWeights", { true, false })), make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE(BATCHED_MATMUL) @@ -420,13 +437,21 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture, framework::Datase make("ReshapeWeights", { false })), make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // BATCHED_MATMUL TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), diff --git a/tests/validation/NEON/GenerateProposalsLayer.cpp b/tests/validation/NEON/GenerateProposalsLayer.cpp index 960c2054e7..7ad40abeaf 100644 --- a/tests/validation/NEON/GenerateProposalsLayer.cpp +++ b/tests/validation/NEON/GenerateProposalsLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -385,16 +385,24 @@ FIXTURE_DATA_TEST_CASE(ComputeAllAnchors, NEComputeAllAnchorsFixture, fra validate(Accessor(_target), _reference); } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(ComputeAllAnchors, NEComputeAllAnchorsFixture, framework::DatasetMode::ALL, combine(combine(framework::dataset::make("NumAnchors", { 2, 4, 8 }), ComputeAllInfoDataset), framework::dataset::make("DataType", { DataType::F16 }))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE_END() // Float diff --git a/tests/validation/NEON/Im2Col.cpp b/tests/validation/NEON/Im2Col.cpp index ef5e75c5db..ccd001ec01 100644 --- a/tests/validation/NEON/Im2Col.cpp +++ b/tests/validation/NEON/Im2Col.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -104,25 +104,41 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture, framework::DatasetMode } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F16)), conv_args_small)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType", DataType::F16)), conv_args)) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float diff --git a/tests/validation/NEON/InstanceNormalizationLayer.cpp b/tests/validation/NEON/InstanceNormalizationLayer.cpp index 593ef0ad99..b349717673 100644 --- a/tests/validation/NEON/InstanceNormalizationLayer.cpp +++ b/tests/validation/NEON/InstanceNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,13 +44,13 @@ namespace { /** Tolerance for float operations */ AbsoluteTolerance tolerance_f32(0.0015f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 // This precision is chosen based on the precision float16_t can provide // for the decimal numbers between 16 and 32 and decided based on multiple // times of execution of tests. Although, with randomly generated numbers // there is no gaurantee that this tolerance will be always large enough. AbsoluteTolerance tolerance_f16(static_cast(0.015625f)); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 } // namespace TEST_SUITE(NEON) @@ -108,7 +108,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEInstanceNormalizationLayerFixture, fra TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEInstanceNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), @@ -116,11 +116,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEInstanceNormalizationLayerFixture, fram framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InPlace", { false, true }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE_END() // InstanceNormalizationLayer TEST_SUITE_END() // Neon diff --git a/tests/validation/NEON/L2NormalizeLayer.cpp b/tests/validation/NEON/L2NormalizeLayer.cpp index 56223c44d5..dfa1544894 100644 --- a/tests/validation/NEON/L2NormalizeLayer.cpp +++ b/tests/validation/NEON/L2NormalizeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,9 +44,9 @@ namespace { /** Tolerance for float operations */ RelativeTolerance tolerance_f32(0.00001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_f16(0.2f); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 } // namespace TEST_SUITE(NEON) @@ -116,15 +116,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture, framework::Da } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("Axis", { -1, 0, 1, 2 })), framework::dataset::make("Epsilon", { 1e-6 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture, framework::DatasetMode::NIGHTLY, @@ -132,11 +140,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture, framework::Dat framework::dataset::make("Axis", { -1, 0, 2 })), framework::dataset::make("Epsilon", { 1e-6 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE_END() // L2NormalizeLayer TEST_SUITE_END() // Neon diff --git a/tests/validation/NEON/LSTMLayer.cpp b/tests/validation/NEON/LSTMLayer.cpp index c4645563bf..037307b6f5 100644 --- a/tests/validation/NEON/LSTMLayer.cpp +++ b/tests/validation/NEON/LSTMLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,95 +43,107 @@ RelativeTolerance tolerance_f32(0.00001f); RelativeTolerance tolerance_f16(half(0.1)); } // namespace +using framework::dataset::make; + TEST_SUITE(NEON) TEST_SUITE(LSTMLayer) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 2U), 1, DataType::U8), // Wrong data type - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong input weights size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong recurrent weights size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell bias size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell state size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong output size - TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong scratch size - }), - framework::dataset::make("InputWeightsInfo", { TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), - })), - framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), - })), - framework::dataset::make("CellBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(30U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - })), - framework::dataset::make("ProjectionBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - })), - framework::dataset::make("CellStateInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(11U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), - })), - framework::dataset::make("ScratchInfo", { TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(12U, 2U), 1, DataType::F32), - })), - framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false, false })), - input_info, input_weights_info, recurrent_weights_info, cell_bias_info, projection_bias_info, cell_state_info, output_info, scratch_info, info, expected) +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + make("InputInfo", { + TensorInfo(TensorShape(8U, 2U), 1, DataType::U8), // Wrong data type + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong input weights size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong recurrent weights size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell bias size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell state size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong output size + TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong scratch size + }), + make("InputWeightsInfo", { + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 16U), 1, DataType::F32), + }), + make("RecurrentWeightsInfo", { + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), + }), + make("CellBiasInfo", { + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(30U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + }), + make("ProjectionBiasInfo", { + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + }), + make("CellStateInfo", { + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(11U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + }), + make("OutputInfo", { + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 2U), 1, DataType::F32), + }), + make("ScratchInfo", { + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(12U, 2U), 1, DataType::F32), + }), + make("ActivationInfo", { + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + }), + make("Expected", { false, false, false, false, false, false, false, false })), + input_info, input_weights_info, recurrent_weights_info, cell_bias_info, + projection_bias_info, cell_state_info, output_info, scratch_info, info, expected) { LSTMParams lstm_params_info; auto cell_bias_clone = cell_bias_info.clone(); @@ -154,11 +166,14 @@ template using NELSTMLayerFixture = LSTMLayerValidationFixture, T>; TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallLSTMLayerDataset(), framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("ProjectionOpt", { true, false })), - framework::dataset::make("PeepholeOpt", { true, false })), - framework::dataset::make("UseLayerNorm", { true, false }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture, framework::DatasetMode::ALL, + combine( + datasets::SmallLSTMLayerDataset(), + make("DataType", DataType::F32), + make("ProjectionOpt", { true, false }), + make("PeepholeOpt", { true, false }), + make("UseLayerNorm", { true, false }), + make("UseMemoryManager", { true, false }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); @@ -166,20 +181,32 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture, framework::DatasetMo } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallLSTMLayerDataset(), framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("ProjectionOpt", { true, false })), - framework::dataset::make("PeepholeOpt", { true, false })), - framework::dataset::make("UseLayerNorm", { true, false }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture, framework::DatasetMode::ALL, + combine( + datasets::SmallLSTMLayerDataset(), + make("DataType", DataType::F16), + make("ProjectionOpt", { true, false }), + make("PeepholeOpt", { true, false }), + make("UseLayerNorm", { true, false }), + make("UseMemoryManager", { true, false }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); - validate(Accessor(_target_scratch), _reference_scratch, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + validate(Accessor(_target_scratch), _reference_scratch, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + TEST_SUITE_END() // LSTMLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/LSTMLayerQuantized.cpp b/tests/validation/NEON/LSTMLayerQuantized.cpp index 6b98ee2b67..7e5e6fbedf 100644 --- a/tests/validation/NEON/LSTMLayerQuantized.cpp +++ b/tests/validation/NEON/LSTMLayerQuantized.cpp @@ -21,15 +21,17 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/runtime/Allocator.h" +#include "arm_compute/runtime/BlobLifetimeManager.h" #include "arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h" +#include "arm_compute/runtime/MemoryManagerOnDemand.h" +#include "arm_compute/runtime/PoolManager.h" #include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" #include "tests/Utils.h" -#include "tests/datasets/LSTMLayerDataset.h" #include "tests/framework/Asserts.h" +#include "tests/framework/DatasetModes.h" #include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" #include @@ -214,6 +216,154 @@ TEST_CASE(RunSmall, framework::DatasetMode::PRECOMMIT) validate(Accessor(output_state), expected_output, tolerance_qsymm16); } +TEST_CASE(RunSmallWithMemoryManager, framework::DatasetMode::PRECOMMIT) +{ + const int batch_size = 2; + const int input_size = 2; + const int output_size = 4; + + + QuantizationInfo qasymm(1.f / 128.f, 128); + QuantizationInfo qweights(1.f / 128.f, 128); + QuantizationInfo qsymm_3(8.f / 32768.f, 0); + QuantizationInfo qsymm_4(16.f / 32768.f, 0); + + TensorShape input_shape{ input_size, batch_size }; + TensorShape input_weights_shape{ input_size, output_size }; + TensorShape recurrent_weights_shape{ output_size, output_size }; + TensorShape output_shape{ output_size, batch_size}; + TensorShape bias_shape{ output_size }; + + auto input_to_input_weights = create_tensor(input_weights_shape, DataType::QASYMM8, 1, qweights); + auto input_to_forget_weights = create_tensor(input_weights_shape, DataType::QASYMM8, 1, qweights); + auto input_to_cell_weights = create_tensor(input_weights_shape, DataType::QASYMM8, 1, qweights); + auto input_to_output_weights = create_tensor(input_weights_shape, DataType::QASYMM8, 1, qweights); + auto recurrent_to_input_weights = create_tensor(recurrent_weights_shape, DataType::QASYMM8, 1, qweights); + auto recurrent_to_forget_weights = create_tensor(recurrent_weights_shape, DataType::QASYMM8, 1, qweights); + auto recurrent_to_cell_weights = create_tensor(recurrent_weights_shape, DataType::QASYMM8, 1, qweights); + auto recurrent_to_output_weights = create_tensor(recurrent_weights_shape, DataType::QASYMM8, 1, qweights); + auto input_gate_bias = create_tensor(bias_shape, DataType::S32); + auto forget_gate_bias = create_tensor(bias_shape, DataType::S32); + auto cell_gate_bias = create_tensor(bias_shape, DataType::S32); + auto output_gate_bias = create_tensor(bias_shape, DataType::S32); + + // LSTM input + auto input = create_tensor(input_shape, DataType::QASYMM8, 1, qasymm); + + // LSTM output state + auto output_state = create_tensor(output_shape, DataType::QASYMM8, 1, qasymm); + + // LSTM cell state + auto cell_state = create_tensor(output_shape, DataType::QSYMM16, 1, qsymm_4); + + auto lifetime_mgr = std::make_shared(); + auto pool_mgr = std::make_shared(); + auto mm = std::make_shared(lifetime_mgr, pool_mgr); + + NELSTMLayerQuantized lstmq(mm); + + lstmq.configure(&input, &input_to_input_weights, &input_to_forget_weights, &input_to_cell_weights, &input_to_output_weights, + &recurrent_to_input_weights, &recurrent_to_forget_weights, &recurrent_to_cell_weights, &recurrent_to_output_weights, + &input_gate_bias, &forget_gate_bias, &cell_gate_bias, &output_gate_bias, &cell_state, &output_state, &cell_state, &output_state); + + input.allocator()->allocate(); + input_to_input_weights.allocator()->allocate(); + input_to_forget_weights.allocator()->allocate(); + input_to_cell_weights.allocator()->allocate(); + input_to_output_weights.allocator()->allocate(); + recurrent_to_input_weights.allocator()->allocate(); + recurrent_to_forget_weights.allocator()->allocate(); + recurrent_to_cell_weights.allocator()->allocate(); + recurrent_to_output_weights.allocator()->allocate(); + input_gate_bias.allocator()->allocate(); + forget_gate_bias.allocator()->allocate(); + cell_gate_bias.allocator()->allocate(); + output_gate_bias.allocator()->allocate(); + cell_state.allocator()->allocate(); + output_state.allocator()->allocate(); + + // Fill weights and biases + fill_tensor(input_to_input_weights, std::vector{ 47, 168, + 66, 239, + 6, 42, + 237, 236 }); + + fill_tensor(input_to_forget_weights, std::vector { 204, 193, + 148, 59, + 113, 17, + 66, 197 }); + + fill_tensor(input_to_cell_weights, std::vector { 172, 101, + 184, 209, + 165, 82, + 108, 209 }); + + fill_tensor(input_to_output_weights, std::vector { 203, 244, + 219, 114, + 130, 16, + 163, 222 }); + + fill_tensor(recurrent_to_input_weights, std::vector { 162, 168, 7, 95, + 91, 155, 108, 216, + 255, 100, 48, 188, + 58, 37, 186, 147 }); + + fill_tensor(recurrent_to_forget_weights, std::vector { 46, 58, 47, 170, + 246, 96, 12, 99, + 68, 23, 186, 161, + 237, 164, 89, 6 }); + + fill_tensor(recurrent_to_cell_weights, std::vector { 234, 99, 71, 206, + 205, 159, 64, 253, + 191, 148, 116, 8, + 209, 136, 59, 138 }); + + fill_tensor(recurrent_to_output_weights, std::vector { 23, 241, 137, 36, + 206, 5, 227, 56, + 254, 176, 231, 47, + 18, 201, 161, 11 }); + + fill_tensor(input_gate_bias, std::vector {-103038, 30525, 115255, -38154 }); + fill_tensor(forget_gate_bias, std::vector { -23428, 126970, 116806, 46307 }); + fill_tensor(cell_gate_bias, std::vector { 128006, 69949, -42808, 42568 }); + fill_tensor(output_gate_bias, std::vector { -67066, -53607, 47233, 7300 }); + + SimpleTensor expected_output(output_shape, DataType::QASYMM8, 1, qasymm); + + // Initialize state + fill_tensor(output_state, std::vector { 128, 128, 128, 128, + 128, 128, 128, 128 }); + fill_tensor(cell_state, std::vector { 0, 0, 0, 0, + 0, 0, 0, 0 }); + + // First input + fill_tensor(input, std::vector { 106, 193, + 155, 150 }); + + fill_tensor(expected_output, std::vector { 128, 130, 36, 134, + 128, 131, 35, 133 }); + + Allocator alloc{}; + mm->populate(alloc, 1); + + lstmq.run(); + validate(Accessor(output_state), expected_output, tolerance_qsymm16); + + // Second input + fill_tensor(expected_output, std::vector { 128, 129, 12, 137, + 128, 131, 10, 136 }); + lstmq.run(); + validate(Accessor(output_state), expected_output, tolerance_qsymm16); + + // Third input + fill_tensor(expected_output, std::vector { 128, 129, 8, 140, + 128, 130, 6, 138 }); + lstmq.run(); + validate(Accessor(output_state), expected_output, tolerance_qsymm16); + + mm->clear(); +} + TEST_CASE(RunLarge, framework::DatasetMode::PRECOMMIT) { const int batch_size = 16; diff --git a/tests/validation/NEON/LogSoftmaxLayer.cpp b/tests/validation/NEON/LogSoftmaxLayer.cpp index a7ab033359..6718597c6b 100644 --- a/tests/validation/NEON/LogSoftmaxLayer.cpp +++ b/tests/validation/NEON/LogSoftmaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -52,9 +52,9 @@ constexpr AbsoluteTolerance tolerance_qasymm8(1); /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", { -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 DataType::F16, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ DataType::F32, }); } // namespace @@ -66,34 +66,58 @@ template using NELogSoftmaxLayerFixture = SoftmaxValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NELogSoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Beta", { 1.0f, 2.0f })), framework::dataset::make("Axis", { 0, -1 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmall4D, NELogSoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Beta", { 1.0f, 2.0f })), framework::dataset::make("Axis", { 0, -3, 2 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NELogSoftmaxLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Beta", { 1.0f, 2.0f })), framework::dataset::make("Axis", { 0 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() //FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall2D, NELogSoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(), diff --git a/tests/validation/NEON/MatMul.cpp b/tests/validation/NEON/MatMul.cpp index f22bd9e86a..ef79faba51 100644 --- a/tests/validation/NEON/MatMul.cpp +++ b/tests/validation/NEON/MatMul.cpp @@ -264,7 +264,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, TEST_SUITE_END() // BF16 #endif /* ARM_COMPUTE_ENABLE_BF16 */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEMatMulFixture, @@ -279,8 +279,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEMatMulFixture, @@ -295,8 +303,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunStressDynamicTensors, NEMatMulDynamicTensorsFixture, @@ -312,11 +328,19 @@ FIXTURE_DATA_TEST_CASE(RunStressDynamicTensors, }), make("NumberOfRuns", 5))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float diff --git a/tests/validation/NEON/MaxUnpoolingLayer.cpp b/tests/validation/NEON/MaxUnpoolingLayer.cpp index 0eb021fe71..ac1fde28c9 100644 --- a/tests/validation/NEON/MaxUnpoolingLayer.cpp +++ b/tests/validation/NEON/MaxUnpoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023 Arm Limited. + * Copyright (c) 2020-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -62,7 +62,7 @@ FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture, framewor validate(Accessor(_target), _reference); } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall, framework::dataset::make("DataType", DataType::F16))), @@ -70,11 +70,19 @@ FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture, framework )) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float diff --git a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp index 085f3608a0..02ce06a843 100644 --- a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,10 +44,10 @@ namespace validation namespace { /** Tolerance for float operations */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_f16(half(0.2f)); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -RelativeTolerance tolerance_f32(1e-4f); +#endif /* ARM_COMPUTE_ENABLE_FP16 */ +RelativeTolerance tolerance_f32(0.001f); RelativeTolerance tolerance_qasymm8(1); } // namespace @@ -77,26 +77,42 @@ template using NEMeanStdDevNormalizationLayerFixture = MeanStdDevNormalizationLayerValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("InPlace", { false, true })), framework::dataset::make("Epsilon", { 1e-3 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } -FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DMeanStdDevNormalizationShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("InPlace", { false, true })), framework::dataset::make("Epsilon", { 1e-8 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), @@ -107,7 +123,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture, f // Validate output validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DMeanStdDevNormalizationShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("InPlace", { false, true })), framework::dataset::make("Epsilon", { 1e-8 }))) diff --git a/tests/validation/NEON/NormalizationLayer.cpp b/tests/validation/NEON/NormalizationLayer.cpp index 7260d1044e..793c2a1104 100644 --- a/tests/validation/NEON/NormalizationLayer.cpp +++ b/tests/validation/NEON/NormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,9 +44,9 @@ namespace validation namespace { /** Tolerance for float operations */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 constexpr AbsoluteTolerance tolerance_f16(0.1f); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ constexpr AbsoluteTolerance tolerance_f32(0.00001f); /** Input data set. */ @@ -95,17 +95,25 @@ template using NENormalizationLayerFixture = NormalizationValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture, framework::DatasetMode::ALL, combine(combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), NormalizationDatasetFP32), diff --git a/tests/validation/NEON/PReluLayer.cpp b/tests/validation/NEON/PReluLayer.cpp index 69a05175ad..de07524a15 100644 --- a/tests/validation/NEON/PReluLayer.cpp +++ b/tests/validation/NEON/PReluLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,13 +54,13 @@ const auto PReluLayerQASYMM8SignedDataset = combine(combine(framework::dataset:: const auto PReluLayerFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_fp16(0.001f); const auto PReluLayerFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 } // namespace @@ -157,21 +157,37 @@ TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEPReluLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), PReluLayerFP16Dataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEPReluLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), PReluLayerFP16Dataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, 0.01); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEPReluLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), PReluLayerFP32Dataset)) diff --git a/tests/validation/NEON/PadLayer.cpp b/tests/validation/NEON/PadLayer.cpp index 4947af3423..f175cdb1e9 100644 --- a/tests/validation/NEON/PadLayer.cpp +++ b/tests/validation/NEON/PadLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -144,26 +144,42 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPaddingFixture, framework::DatasetMode } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEPaddingFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::F16 })), PaddingSizesDataset), framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT }))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEPaddingFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F16 })), PaddingSizesDataset), framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC }))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float TEST_SUITE(Quantized) diff --git a/tests/validation/NEON/Permute.cpp b/tests/validation/NEON/Permute.cpp index d897bbbe07..e9939105cd 100644 --- a/tests/validation/NEON/Permute.cpp +++ b/tests/validation/NEON/Permute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -174,6 +174,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPermuteFixture, framework::DatasetM } TEST_SUITE_END() +#ifdef ARM_COMPUTE_ENABLE_FP16 +TEST_SUITE(F16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEPermuteFixture, framework::DatasetMode::PRECOMMIT, + PermuteParametersSmall * framework::dataset::make("DataType", DataType::F16)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEPermuteFixture, framework::DatasetMode::NIGHTLY, + PermuteParametersLarge * framework::dataset::make("DataType", DataType::F16)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + TEST_SUITE_END() TEST_SUITE_END() } // namespace validation diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp index 964d1c5deb..101064770c 100644 --- a/tests/validation/NEON/PixelWiseMultiplication.cpp +++ b/tests/validation/NEON/PixelWiseMultiplication.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -101,7 +101,17 @@ const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)), \ (INPLACE_DATASET))) \ { \ - VALIDATE \ + if((DataType::DT1 != DataType::F16 && \ + DataType::DT2 != DataType::F16 && \ + DataType::DT3 != DataType::F16) || CPUInfo::get().has_fp16()) \ + { \ + VALIDATE \ + } \ + else \ + { \ + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); \ + framework::ARM_COMPUTE_PRINT_INFO(); \ + } \ } // *INDENT-ON* @@ -531,7 +541,7 @@ TEST_SUITE_END() // Broadcast TEST_SUITE_END() // S32toS32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16toF16) TEST_SUITE(Scale255) @@ -539,7 +549,7 @@ PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF16Fixture>; constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */ constexpr AbsoluteTolerance tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */ @@ -239,14 +239,22 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture, framework::Data TEST_SUITE_END() // GlobalPooling TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5x5Shapes(), combine(Pooling3dLayerDatasetFPSmall, framework::dataset::make("DataType", DataType::F16)))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } @@ -254,8 +262,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture, framework::Datas framework::dataset::make("DataType", DataType::F16)))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE(GlobalPooling) @@ -273,8 +289,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture, framework::Datas framework::dataset::make("ExcludePadding", {false, true})), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } @@ -286,8 +310,16 @@ FIXTURE_DATA_TEST_CASE(RunSmallGlobal, NEPooling3dLayerGlobalFixture, fram framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture, framework::DatasetMode::NIGHTLY, @@ -302,15 +334,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture, framework::Datas framework::dataset::make("ExcludePadding", false)), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } // clang-format on // *INDENT-ON* TEST_SUITE_END() // GlobalPooling TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float TEST_SUITE(Quantized) diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp index 161fe627cc..f635a63bbe 100644 --- a/tests/validation/NEON/PoolingLayer.cpp +++ b/tests/validation/NEON/PoolingLayer.cpp @@ -58,9 +58,9 @@ const auto PoolingLayerDatasetQASYMM8Small = combine(combine(combine(framework:: framework::dataset::make("ExcludePadding", { true })); constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */ constexpr AbsoluteTolerance tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */ const auto pool_data_layout_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }); @@ -226,7 +226,7 @@ FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall, @@ -235,23 +235,47 @@ FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture, framework framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("UseKernelIndices", { false }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); - validate(Accessor(_target_indices), _ref_indices); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + validate(Accessor(_target_indices), _ref_indices); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetFPSmall, framework::dataset::make("DataType", DataType::F16))), pool_data_layout_dataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType", DataType::F16))), pool_data_layout_dataset)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE(CornerCases) FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(pool_outside_input_dataset, @@ -259,12 +283,20 @@ FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_u8); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_u8); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8Signed, NEQuantizationLayerQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(QuantizationSmallShapes, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_s8); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_s8); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmallQASYMM16, NEQuantizationLayerQASYMM16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_u16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_u16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLargeQASYMM8, NEQuantizationLayerQASYMM8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(QuantizationLargeShapes, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_u8); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_u8); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLargeQASYMM16, NEQuantizationLayerQASYMM16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(QuantizationLargeShapes, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_u16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_u16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE_END() // Float TEST_SUITE(Quantized) diff --git a/tests/validation/NEON/RNNLayer.cpp b/tests/validation/NEON/RNNLayer.cpp index 979aa0f2c5..d6e4b7ac0e 100644 --- a/tests/validation/NEON/RNNLayer.cpp +++ b/tests/validation/NEON/RNNLayer.cpp @@ -40,10 +40,10 @@ namespace validation namespace { RelativeTolerance tolerance_f32(0.001f); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType:F32 */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance tolerance_f16(half(0.1)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType:F16 */ constexpr float abs_tolerance_f16(0.02f); /**< Absolute tolerance value for comparing reference's output against implementation's output for DataType:F16 */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ } // namespace TEST_SUITE(NEON) @@ -134,15 +134,23 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERNNLayerFixture, framework::DatasetMod } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NERNNLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallRNNLayerDataset(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16, 0.02f, abs_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16, 0.02f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // RNNLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/ROIAlignLayer.cpp b/tests/validation/NEON/ROIAlignLayer.cpp index 98c92a0b20..1f3db04ffd 100644 --- a/tests/validation/NEON/ROIAlignLayer.cpp +++ b/tests/validation/NEON/ROIAlignLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,10 +47,10 @@ namespace RelativeTolerance relative_tolerance_f32(0.01f); AbsoluteTolerance absolute_tolerance_f32(0.001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 RelativeTolerance relative_tolerance_f16(0.01f); AbsoluteTolerance absolute_tolerance_f16(0.001f); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 constexpr AbsoluteTolerance tolerance_qasymm8(1); constexpr AbsoluteTolerance tolerance_qasymm8_s(1); @@ -115,17 +115,25 @@ FIXTURE_DATA_TEST_CASE(SmallROIAlignLayerFloat, NEROIAlignLayerFloatFixture, fra // Validate output validate(Accessor(_target), _reference, relative_tolerance_f32, .02f, absolute_tolerance_f32); } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 using NEROIAlignLayerHalfFixture = ROIAlignLayerFixture; FIXTURE_DATA_TEST_CASE(SmallROIAlignLayerHalf, NEROIAlignLayerHalfFixture, framework::DatasetMode::ALL, framework::dataset::combine(framework::dataset::combine(datasets::SmallROIDataset(), framework::dataset::make("DataType", { DataType::F16 })), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { - // Validate output - validate(Accessor(_target), _reference, relative_tolerance_f16, .02f, absolute_tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, relative_tolerance_f16, .02f, absolute_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE_END() // Float diff --git a/tests/validation/NEON/Range.cpp b/tests/validation/NEON/Range.cpp index fda7b2c448..0df5e86186 100644 --- a/tests/validation/NEON/Range.cpp +++ b/tests/validation/NEON/Range.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -144,7 +144,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERangeFixture, framework::DatasetMode TEST_SUITE_END() // S16 TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NERangeFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine( framework::dataset::make("DataType", DataType::F16), @@ -152,11 +152,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERangeFixture, framework::DatasetMode::P float_step_dataset), framework::dataset::make("QuantizationInfo", { QuantizationInfo() }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance, 0.f, abs_tolerance); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance, 0.f, abs_tolerance); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NERangeFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine( diff --git a/tests/validation/NEON/ReduceMean.cpp b/tests/validation/NEON/ReduceMean.cpp index 8ca0bb53a7..e5692693bd 100644 --- a/tests/validation/NEON/ReduceMean.cpp +++ b/tests/validation/NEON/ReduceMean.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,9 +43,9 @@ namespace validation namespace { constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 constexpr AbsoluteTolerance tolerance_f16(0.03f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #ifdef __aarch64__ constexpr AbsoluteTolerance tolerance_u8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric quantized type */ constexpr AbsoluteTolerance tolerance_s8(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric quantized type */ @@ -93,15 +93,23 @@ using NEReduceMeanFixture = ReduceMeanFixture TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEReduceMeanFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), concat(axis_keep, axis_drop))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, @@ -109,11 +117,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F16)), concat(axis_keep, axis_drop))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEReduceMeanFixture, diff --git a/tests/validation/NEON/ReductionOperation.cpp b/tests/validation/NEON/ReductionOperation.cpp index 48c3a1a788..727e880d28 100644 --- a/tests/validation/NEON/ReductionOperation.cpp +++ b/tests/validation/NEON/ReductionOperation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,10 +45,10 @@ namespace /** Tolerance for float operations */ AbsoluteTolerance tolerance_f32(0.0001f); RelativeTolerance rel_tolerance_f32(0.0001f); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 AbsoluteTolerance tolerance_f16(0.2f); RelativeTolerance rel_tolerance_f16(0.1f); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 /** Tolerance for quantized operations */ RelativeTolerance tolerance_quantized(1.f); @@ -149,22 +149,38 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationFixture, framework:: } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), Axises), ReductionOperations), KeepDims)) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F16)), Axises), ReductionOperations), KeepDims)) { - // Validate output - validate(Accessor(_target), _reference, rel_tolerance_f16, 0, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, 0, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 template using NEReductionOperationQuantizedFixture = ReductionOperationQuantizedFixture; diff --git a/tests/validation/NEON/Reverse.cpp b/tests/validation/NEON/Reverse.cpp index 7b5337f14b..7d99bd614d 100644 --- a/tests/validation/NEON/Reverse.cpp +++ b/tests/validation/NEON/Reverse.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -94,7 +94,7 @@ using NEReverseFixture = ReverseValidationFixture, @@ -105,8 +105,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, make("use_negative_axis", { true, false }), make("use_inverted_axis", { true, false }))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, @@ -118,11 +126,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, make("use_negative_axis", { true, false }), make("use_inverted_axis", { true, false }))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp index f1209a21ac..55de2d6281 100644 --- a/tests/validation/NEON/Scale.cpp +++ b/tests/validation/NEON/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -79,10 +79,10 @@ constexpr AbsoluteTolerance tolerance_u8(1); constexpr AbsoluteTolerance tolerance_s8(1); constexpr AbsoluteTolerance tolerance_s16(1); RelativeTolerance tolerance_f32(0.05); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 constexpr float abs_tolerance_f16(0.01f); RelativeTolerance tolerance_f16(half(0.1)); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ constexpr float tolerance_num_s16 = 0.01f; constexpr float tolerance_num_f32 = 0.01f; @@ -153,9 +153,9 @@ TEST_CASE(SupportDataType, framework::DatasetMode::ALL) { DataType::U64, false }, { DataType::S64, false }, { DataType::BFLOAT16, false }, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 { DataType::F16, true }, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 { DataType::F32, true }, { DataType::F64, false }, { DataType::SIZET, false }, @@ -381,57 +381,97 @@ FIXTURE_DATA_TEST_CASE(RunMediumAlignCornersNHWC, NEScaleFixture, framewo validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32); } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) const auto f16_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector())), framework::dataset::make("DataType", DataType::F16)); const auto f16_shape_nhwc = combine(datasets::Small3DShapes(), framework::dataset::make("DataType", DataType::F16)); FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleSamplingPolicySet)) { - //Create valid region - TensorInfo src_info(_shape, 1, _data_type); - const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + if(CPUInfo::get().has_fp16()) + { + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); - // Validate output - validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleAlignCornersSamplingPolicySet)) { - //Create valid region - TensorInfo src_info(_shape, 1, _data_type); - const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + if(CPUInfo::get().has_fp16()) + { + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); - // Validate output - validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunMediumNHWC, NEScaleFixture, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleSamplingPolicySet)) { - //Create valid region - TensorInfo src_info(_shape, 1, _data_type); - ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + if(CPUInfo::get().has_fp16()) + { + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); - // Validate output - validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunMediumMixedDataLayoutNHWC, NEScaleMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleSamplingPolicySet)) { - //Create valid region - TensorInfo src_info(_shape, 1, _data_type); - ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + if(CPUInfo::get().has_fp16()) + { + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); - // Validate output - validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunMediumAlignCornersNHWC, NEScaleFixture, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleAlignCornersSamplingPolicySet)) { - //Create valid region - TensorInfo src_info(_shape, 1, _data_type); - ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + if(CPUInfo::get().has_fp16()) + { + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); - // Validate output - validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE_END() // Float TEST_SUITE(Integer) diff --git a/tests/validation/NEON/Select.cpp b/tests/validation/NEON/Select.cpp index 40744581b0..25d510aa64 100644 --- a/tests/validation/NEON/Select.cpp +++ b/tests/validation/NEON/Select.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -96,15 +96,22 @@ using NESelectFixture = SelectValidationFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NESelectFixture, framework::DatasetMode::PRECOMMIT, combine(run_small_dataset, framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, @@ -112,11 +119,18 @@ FIXTURE_DATA_TEST_CASE(RunLarge, framework::DatasetMode::NIGHTLY, combine(run_large_dataset, framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/NEON/Slice.cpp b/tests/validation/NEON/Slice.cpp index d5549c8cdb..2ec6d09134 100644 --- a/tests/validation/NEON/Slice.cpp +++ b/tests/validation/NEON/Slice.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -67,15 +67,22 @@ template using NESliceFixture = SliceFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NESliceFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallSliceDataset(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, @@ -83,11 +90,18 @@ FIXTURE_DATA_TEST_CASE(RunLarge, framework::DatasetMode::NIGHTLY, combine(datasets::LargeSliceDataset(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp index 94d0866c38..e428d7958b 100644 --- a/tests/validation/NEON/SoftmaxLayer.cpp +++ b/tests/validation/NEON/SoftmaxLayer.cpp @@ -54,9 +54,9 @@ constexpr AbsoluteTolerance tolerance_qasymm8_signed(1); /** CNN data types */ const auto CNNDataTypes = make("DataType", { -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 DataType::F16, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ DataType::F32, }); } // namespace @@ -157,7 +157,7 @@ DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, } TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine( @@ -166,8 +166,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture, framework::Datas make("Beta", { 1.0f, 2.0f }), make("Axis", { 0, -1 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine( @@ -176,8 +183,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture, framework::Dataset make("Beta", { 1.0f, 2.0f }), make("Axis", { 0, 1 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine( @@ -186,8 +201,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture, framework::Datas make("Beta", { 1.0f }), make("Axis", { 0, 2, -1 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture, framework::DatasetMode::NIGHTLY, combine( @@ -196,11 +219,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture, framework::Dataset make("Beta", { 1.0f, 2.0f }), make("Axis", { 0 }))) { - // Validate output - validate(Accessor(_target), _reference, tolerance_f16); + if(CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() //FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, diff --git a/tests/validation/NEON/Split.cpp b/tests/validation/NEON/Split.cpp index 72df2ad663..d7aa2e532c 100644 --- a/tests/validation/NEON/Split.cpp +++ b/tests/validation/NEON/Split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -98,17 +98,25 @@ template using NESplitShapesFixture = SplitShapesFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NESplitFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallSplitDataset(), framework::dataset::make("DataType", DataType::F16))) { - // Validate outputs - for(unsigned int i = 0; i < _target.size(); ++i) + if(CPUInfo::get().has_fp16()) { - validate(Accessor(_target[i]), _reference[i]); + // Validate outputs + for(unsigned int i = 0; i < _target.size(); ++i) + { + validate(Accessor(_target[i]), _reference[i]); + } + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); } } @@ -117,14 +125,22 @@ FIXTURE_DATA_TEST_CASE(RunLarge, framework::DatasetMode::NIGHTLY, combine(datasets::LargeSplitDataset(), framework::dataset::make("DataType", DataType::F16))) { - // Validate outputs - for(unsigned int i = 0; i < _target.size(); ++i) + if(CPUInfo::get().has_fp16()) { - validate(Accessor(_target[i]), _reference[i]); + // Validate outputs + for(unsigned int i = 0; i < _target.size(); ++i) + { + validate(Accessor(_target[i]), _reference[i]); + } + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/NEON/StridedSlice.cpp b/tests/validation/NEON/StridedSlice.cpp index a1b3cef801..7c76800d1f 100644 --- a/tests/validation/NEON/StridedSlice.cpp +++ b/tests/validation/NEON/StridedSlice.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -69,15 +69,22 @@ template using NEStridedSliceFixture = StridedSliceFixture; TEST_SUITE(Float) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStridedSliceFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallStridedSliceDataset(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, @@ -85,11 +92,18 @@ FIXTURE_DATA_TEST_CASE(RunLarge, framework::DatasetMode::NIGHTLY, combine(datasets::LargeStridedSliceDataset(), framework::dataset::make("DataType", DataType::F16))) { - // Validate output - validate(Accessor(_target), _reference); + if(CPUInfo::get().has_fp16()) + { + validate(Accessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE_END() // FP16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/NEON/Unstack.cpp b/tests/validation/NEON/Unstack.cpp index 3e8f1ff324..18e778b9fd 100644 --- a/tests/validation/NEON/Unstack.cpp +++ b/tests/validation/NEON/Unstack.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -95,19 +95,28 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEUnstackFixture, framework::DatasetMode } TEST_SUITE_END() // F32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEUnstackFixture, framework::DatasetMode::PRECOMMIT, unstack_dataset_small * framework::dataset::make("DataType", { DataType::F16 })) { ARM_COMPUTE_ERROR_ON(_target.size() != _reference.size()); - // Validate output - for(size_t k = 0; k < _target.size(); ++k) + + if(CPUInfo::get().has_fp16()) { - validate(Accessor(_target[k]), _reference[k]); + // Validate output + for(size_t k = 0; k < _target.size(); ++k) + { + validate(Accessor(_target[k]), _reference[k]); + } + } + else + { + ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_INFO(); } } TEST_SUITE_END() // F16 -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(Quantized) FIXTURE_DATA_TEST_CASE(RunSmall, NEUnstackFixture, framework::DatasetMode::PRECOMMIT, unstack_dataset_small * framework::dataset::make("DataType", { DataType::QASYMM8 })) diff --git a/tests/validation/fixtures/ActivationLayerFixture.h b/tests/validation/fixtures/ActivationLayerFixture.h index a24ba8913e..d3e8bf09f2 100644 --- a/tests/validation/fixtures/ActivationLayerFixture.h +++ b/tests/validation/fixtures/ActivationLayerFixture.h @@ -50,6 +50,12 @@ class ActivationValidationGenericFixture : public framework::Fixture void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, QuantizationInfo quantization_info) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + ActivationLayerInfo info(function, alpha_beta, alpha_beta); _in_place = in_place; diff --git a/tests/validation/fixtures/AddMulAddFixture.h b/tests/validation/fixtures/AddMulAddFixture.h index d13fef2f02..788e1c974f 100644 --- a/tests/validation/fixtures/AddMulAddFixture.h +++ b/tests/validation/fixtures/AddMulAddFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -144,8 +144,15 @@ class AddMulAddFloatValidationFixture : public AddMulAddGenericFixture::value; + const bool is_not_fp16 = data_type != DataType::F16; + const bool device_has_fp16 = CPUInfo::get().has_fp16(); + + if(is_not_cpu || is_not_fp16 || device_has_fp16) + { + Parent::setup(shape, data_type, act_info, interm_out); + compute_reference(shape, data_type, act_info); + } } // Compute Reference is moved outside of the generic fixture because with the quantized data types, @@ -202,6 +209,12 @@ class AddMulAddQuantizedValidationFixture : public AddMulAddGenericFixture::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + // Quantization arguments moved to class attributes to prevent long function declerations Parent::_input1_qinfo = input1_qinfo; Parent::_input2_qinfo = input2_qinfo; diff --git a/tests/validation/fixtures/ArgMinMaxFixture.h b/tests/validation/fixtures/ArgMinMaxFixture.h index 7a823568a8..884b19260a 100644 --- a/tests/validation/fixtures/ArgMinMaxFixture.h +++ b/tests/validation/fixtures/ArgMinMaxFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023 Arm Limited. + * Copyright (c) 2018-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_ARG_MIN_MAX_FIXTURE -#define ARM_COMPUTE_TEST_ARG_MIN_MAX_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_ARGMINMAXFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_ARGMINMAXFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -48,6 +48,12 @@ class ArgMinMaxValidationBaseFixture : public framework::Fixture public: void setup(TensorShape shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo q_info) { + if(std::is_same::value && // Cpu + input_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, input_type, output_type, axis, op, q_info); _reference = compute_reference(shape, input_type, output_type, axis, op, q_info); } @@ -168,4 +174,4 @@ class ArgMinMaxValidationFixture : public ArgMinMaxValidationBaseFixture::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _op = op; _act_info = act_info; _is_inplace = is_inplace; @@ -284,4 +290,4 @@ class ArithmeticSubtractionValidationQuantizedBroadcastFixture : public Arithmet } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_ARITHMETIC_OPERATIONS_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_ARITHMETICOPERATIONSFIXTURE_H diff --git a/tests/validation/fixtures/BatchNormalizationLayerFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFixture.h index 54a0ed9e09..2374ecf64a 100644 --- a/tests/validation/fixtures/BatchNormalizationLayerFixture.h +++ b/tests/validation/fixtures/BatchNormalizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_BATCH_NORMALIZATION_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_BATCH_NORMALIZATION_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_BATCHNORMALIZATIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_BATCHNORMALIZATIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -46,10 +46,15 @@ class BatchNormalizationLayerValidationFixture : public framework::Fixture public: void setup(TensorShape shape0, TensorShape shape1, float epsilon, bool use_beta, bool use_gamma, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout) { + if(std::is_same::value && // Cpu + dt == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _data_type = dt; _use_beta = use_beta; _use_gamma = use_gamma; - _target = compute_target(shape0, shape1, epsilon, act_info, dt, data_layout); _reference = compute_reference(shape0, shape1, epsilon, act_info, dt); } @@ -165,4 +170,4 @@ class BatchNormalizationLayerValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_BATCH_NORMALIZATION_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_BATCHNORMALIZATIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/BoundingBoxTransformFixture.h b/tests/validation/fixtures/BoundingBoxTransformFixture.h index 03edaeab16..84576335b0 100644 --- a/tests/validation/fixtures/BoundingBoxTransformFixture.h +++ b/tests/validation/fixtures/BoundingBoxTransformFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_BOUNDINGBOXTRANSFORM_FIXTURE -#define ARM_COMPUTE_TEST_BOUNDINGBOXTRANSFORM_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_BOUNDINGBOXTRANSFORMFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_BOUNDINGBOXTRANSFORMFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -104,6 +104,12 @@ class BoundingBoxTransformGenericFixture : public framework::Fixture void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type, QuantizationInfo deltas_qinfo) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + const bool is_qasymm16 = data_type == DataType::QASYMM16; _data_type_deltas = (is_qasymm16) ? DataType::QASYMM8 : data_type; _boxes_qinfo = (is_qasymm16) ? QuantizationInfo(.125f, 0) : QuantizationInfo(); @@ -234,4 +240,4 @@ class BoundingBoxTransformQuantizedFixture : public BoundingBoxTransformGenericF } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_BOUNDINGBOXTRANSFORM_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_BOUNDINGBOXTRANSFORMFIXTURE_H diff --git a/tests/validation/fixtures/CastFixture.h b/tests/validation/fixtures/CastFixture.h index e9d624e6f3..8297ec81dc 100644 --- a/tests/validation/fixtures/CastFixture.h +++ b/tests/validation/fixtures/CastFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_CAST_FIXTURE -#define ARM_COMPUTE_TEST_CAST_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CASTFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CASTFIXTURE_H #include "tests/validation/fixtures/DepthConvertLayerFixture.h" @@ -38,6 +38,12 @@ class CastValidationFixture : public framework::Fixture public: void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy) { + if(std::is_same::value && // Cpu + (dt_in == DataType::F16 || dt_out == DataType::F16) && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, dt_in, dt_out, policy); _reference = compute_reference(shape, dt_in, dt_out, policy); } @@ -151,4 +157,4 @@ class CastValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CAST_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_CASTFIXTURE_H diff --git a/tests/validation/fixtures/ChannelShuffleLayerFixture.h b/tests/validation/fixtures/ChannelShuffleLayerFixture.h index 530dba3893..63dfd62751 100644 --- a/tests/validation/fixtures/ChannelShuffleLayerFixture.h +++ b/tests/validation/fixtures/ChannelShuffleLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_CHANNEL_SHUFFLE_FIXTURE -#define ARM_COMPUTE_TEST_CHANNEL_SHUFFLE_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CHANNELSHUFFLELAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CHANNELSHUFFLELAYERFIXTURE_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorShape.h" @@ -47,6 +47,12 @@ class ChannelShuffleLayerValidationFixture : public framework::Fixture public: void setup(TensorShape shape, unsigned int num_groups, DataType data_type, DataLayout data_layout) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, data_type, num_groups, data_layout); _reference = compute_reference(shape, data_type, num_groups); } @@ -110,4 +116,4 @@ class ChannelShuffleLayerValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CHANNEL_SHUFFLE_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_CHANNELSHUFFLELAYERFIXTURE_H diff --git a/tests/validation/fixtures/ComparisonFixture.h b/tests/validation/fixtures/ComparisonFixture.h index f25d5abb73..b7c94e1c8a 100644 --- a/tests/validation/fixtures/ComparisonFixture.h +++ b/tests/validation/fixtures/ComparisonFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_COMPARISON_FIXTURE -#define ARM_COMPUTE_TEST_COMPARISON_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_COMPARISONFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_COMPARISONFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -46,6 +46,12 @@ class ComparisonValidationGenericFixture : public framework::Fixture public: void setup(ComparisonOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(op, shape0, shape1, data_type, qinfo0, qinfo1); _reference = compute_reference(op, shape0, shape1, data_type, qinfo0, qinfo1); } @@ -155,4 +161,4 @@ class ComparisonQuantizedBroadcastValidationFixture : public ComparisonValidatio } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_COMPARISON_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_COMPARISONFIXTURE_H diff --git a/tests/validation/fixtures/ComputeAllAnchorsFixture.h b/tests/validation/fixtures/ComputeAllAnchorsFixture.h index 620f1b53fa..a0e712e567 100644 --- a/tests/validation/fixtures/ComputeAllAnchorsFixture.h +++ b/tests/validation/fixtures/ComputeAllAnchorsFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_COMPUTEALLANCHORS_FIXTURE -#define ARM_COMPUTE_TEST_COMPUTEALLANCHORS_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_COMPUTEALLANCHORSFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_COMPUTEALLANCHORSFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -46,6 +46,12 @@ class ComputeAllAnchorsGenericFixture : public framework::Fixture public: void setup(size_t num_anchors, const ComputeAnchorsInfo &info, DataType data_type, QuantizationInfo qinfo) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(num_anchors, data_type, info, qinfo); _reference = compute_reference(num_anchors, data_type, info, qinfo); } @@ -124,4 +130,4 @@ class ComputeAllAnchorsQuantizedFixture : public ComputeAllAnchorsGenericFixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_COMPUTEALLANCHORS_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_COMPUTEALLANCHORSFIXTURE_H diff --git a/tests/validation/fixtures/ConcatenateLayerFixture.h b/tests/validation/fixtures/ConcatenateLayerFixture.h index 3a021661ac..dab055b7b9 100644 --- a/tests/validation/fixtures/ConcatenateLayerFixture.h +++ b/tests/validation/fixtures/ConcatenateLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CONCATENATELAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CONCATENATELAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -52,6 +52,12 @@ class ConcatenateLayerValidationFixture : public framework::Fixture public: void setup(TensorShape shape, DataType data_type, unsigned int axis) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + // Create input shapes std::mt19937 gen(library->seed()); std::uniform_int_distribution<> num_dis(2, 8); @@ -170,4 +176,4 @@ class ConcatenateLayerValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_CONCATENATELAYERFIXTURE_H diff --git a/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h b/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h index 7ad14e1b40..5e2f9a9c3d 100644 --- a/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h +++ b/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023 Arm Limited. + * Copyright (c) 2018-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_CONVERT_FULLY_CONNECTED_WEIGHTS_FIXTURE -#define ARM_COMPUTE_TEST_CONVERT_FULLY_CONNECTED_WEIGHTS_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CONVERTFULLYCONNECTEDWEIGHTSFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CONVERTFULLYCONNECTEDWEIGHTSFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -45,6 +45,12 @@ class ConvertFullyConnectedWeightsValidationFixture : public framework::Fixture public: void setup(TensorShape input_shape, unsigned int weights_w, DataLayout training_data_layout, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + const unsigned int height = input_shape.x() * input_shape.y() * input_shape.z(); const TensorShape weights_shape(weights_w, height); @@ -128,4 +134,4 @@ class ConvertFullyConnectedWeightsValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CONVERT_FULLY_CONNECTED_WEIGHTS_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_CONVERTFULLYCONNECTEDWEIGHTSFIXTURE_H diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h index 0622e5e6f0..61a79bd0fb 100644 --- a/tests/validation/fixtures/ConvolutionLayerFixture.h +++ b/tests/validation/fixtures/ConvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -125,6 +125,12 @@ class ConvolutionValidationGenericFixture : public framework::Fixture DataType data_type, DataType weights_data_type, DataLayout data_layout, QuantizationInfo quantization_info, QuantizationInfo weight_quantization_info, ActivationLayerInfo act_info, bool mixed_layout = false, PaddingList pre_pad_layer = PaddingList({}), bool padded_weights = false) { + if(std::is_same::value && // Cpu + (data_type == DataType::F16 || weights_data_type == DataType::F16) && !CPUInfo::get().has_fp16()) + { + return; + } + // This hash is used by random generators. There may be hash collisions but // this is intentional as it's a very easy way to make the the current // random generation process almost different for many test configurations, @@ -204,7 +210,10 @@ class ConvolutionValidationGenericFixture : public framework::Fixture { if(_use_dynamic_output_quant) { - std::uniform_int_distribution distribution(-128, 127); + // Using -127 as the lower bound because of possible overflow. + // This is a known issue and reported in the errata. + // See COMPMID-7109 for more details + std::uniform_int_distribution distribution(-127, 127); library->fill(tensor, distribution, i); } else @@ -597,6 +606,12 @@ class VariableWeightsFixtureBaseClass : public framework::Fixture void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataLayout data_layout, const DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + conv = std::make_unique(); // prepare data _data_layout = data_layout; @@ -783,6 +798,12 @@ class HasOptImplFixture : public framework::Fixture public: void setup(DataType data_type, arm_compute::WeightFormat query_weight_format) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + auto conv = std::make_unique(); const auto src_info = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC); const auto weight_info = TensorInfo(TensorShape(64, 3U, 3U, 64U), 1, enable_fast_math ? DataType::BFLOAT16 : data_type, DataLayout::NHWC); diff --git a/tests/validation/fixtures/CpuActivationFixture.h b/tests/validation/fixtures/CpuActivationFixture.h new file mode 100644 index 0000000000..9e05db969a --- /dev/null +++ b/tests/validation/fixtures/CpuActivationFixture.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUACTIVATIONFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CPUACTIVATIONFIXTURE_H + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/ActivationLayer.h" + +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class CpuActivationValidationGenericFixture : public framework::Fixture +{ +public: + + void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, QuantizationInfo quantization_info) + { + ActivationLayerInfo info(function, alpha_beta, alpha_beta); + + _in_place = in_place; + _data_type = data_type; + // We are only testing fp32 datatype for CpuActivation wrapper. Hence, + // we can ignore quantization_info here and just use the default one. + _output_quantization_info = quantization_info; + _input_quantization_info = quantization_info; + + _function = function; + _target = compute_target(shape, info); + _reference = compute_reference(shape, info); + } + +protected: + std::vector get_boundary_values(T min, T max) + { + // This function will return a vector filled with the following values that can + // represent two partitions derived from equivalent partitioning. + // * Lower parition: min, min + delta, lower quarter (nominal), center - delta + // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max + const auto delta = is_data_type_float(_data_type) ? T(0.1f) : T(1); + const auto center_value = (min + max) / 2; + const auto lower_quarter = (min + center_value) / 2; + const auto upper_quarter = (center_value + max) / 2; + + std::vector boundary_values{}; + + // To ensure all the inserted values are within the given range after subtracing/adding delta + auto insert_values = [&boundary_values, &min, &max](const std::initializer_list &new_values) + { + for(auto &v : new_values) + { + if(v >= min && v <= max) + { + boundary_values.emplace_back(v); + } + } + }; + + insert_values({ min, static_cast(min + delta), static_cast(lower_quarter), static_cast(center_value - delta) }); // lower partition + insert_values({ static_cast(center_value), static_cast(center_value + delta), static_cast(upper_quarter), static_cast(max - delta), max }); // upper partition + + return boundary_values; + } + + template + void fill(U &&tensor) + { + if(is_data_type_float(_data_type)) + { + float min_bound = 0; + float max_bound = 0; + std::tie(min_bound, max_bound) = get_activation_layer_test_bounds(_function, _data_type); + library->fill_static_values(tensor, get_boundary_values(static_cast(min_bound), static_cast(max_bound))); + } + else + { + PixelValue min{}; + PixelValue max{}; + std::tie(min, max) = get_min_max(tensor.data_type()); + library->fill_static_values(tensor, get_boundary_values(min.get(), max.get())); + } + } + + TensorType compute_target(const TensorShape &shape, ActivationLayerInfo info) + { + // Create tensors + TensorType src = create_tensor(shape, _data_type, 1, _input_quantization_info, DataLayout::NCHW); + TensorType dst = create_tensor(shape, _data_type, 1, _output_quantization_info, DataLayout::NCHW); + + // Create and configure function + FunctionType act_layer; + + TensorType *dst_ptr = _in_place ? &src : &dst; + + if(!_in_place) + { + act_layer.configure(src.info(), dst.info(), info); + } + else { + act_layer.configure(src.info(), nullptr, info); + } + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + // Allocate tensors + src.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + + if(!_in_place) + { + dst.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + } + + // Fill tensors + fill(AccessorType(src)); + + // Compute function + ITensorPack run_pack{ { arm_compute::TensorType::ACL_SRC, &src }, { arm_compute::TensorType::ACL_DST, dst_ptr } }; + act_layer.run(run_pack); + + if(_in_place) + { + return src; + } + else + { + return dst; + } + } + + SimpleTensor compute_reference(const TensorShape &shape, ActivationLayerInfo info) + { + // Create reference + SimpleTensor src{ shape, _data_type, 1, _input_quantization_info }; + + // Fill reference + fill(src); + + return reference::activation_layer(src, info, _output_quantization_info); + } + +protected: + TensorType _target{}; + SimpleTensor _reference{}; + bool _in_place{}; + QuantizationInfo _input_quantization_info{}; + QuantizationInfo _output_quantization_info{}; + DataType _data_type{}; + ActivationLayerInfo::ActivationFunction _function{}; +}; + +template +class CpuActivationValidationFixture : public CpuActivationValidationGenericFixture +{ +public: + void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type) + { + CpuActivationValidationGenericFixture::setup(shape, in_place, function, alpha_beta, data_type, QuantizationInfo()); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUACTIVATIONFIXTURE_H diff --git a/tests/validation/fixtures/CpuGemmConv2dFixture.h b/tests/validation/fixtures/CpuGemmConv2dFixture.h new file mode 100644 index 0000000000..c8e82fb8a0 --- /dev/null +++ b/tests/validation/fixtures/CpuGemmConv2dFixture.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMCONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMCONV2DFIXTURE_H + +#include "arm_compute/core/experimental/Types.h" +#include "arm_compute/core/QuantizationInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/graph/Utils.h" + +#include "src/core/helpers/MemoryHelpers.h" +#include "tests/AssetsLibrary.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/ConvolutionLayer.h" +#include "tests/validation/reference/Utils.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ + +template +class CpuGemmConv2dValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape bias_shape, + TensorShape output_shape, + PadStrideInfo info, + Size2D dilation) + { + _dilation = dilation; + _hash = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] + weights_shape[0] + + weights_shape[1] + weights_shape[2] + weights_shape[3]; + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info); + } + +protected: + template + void fill(U &&tensor, int i) + { + std::uniform_real_distribution distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + } + + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + TensorShape output_shape, + const PadStrideInfo &info) + { + // We need to permute to the same layout that the reference impl needs. + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + + const auto src_info = TensorInfo(input_shape, 1, DataType::F32, _data_layout); + const auto weights_info = TensorInfo(weights_shape, 1, DataType::F32, _data_layout); + const auto biases_info = TensorInfo(bias_shape, 1, DataType::F32, _data_layout); + auto dst_info = TensorInfo(output_shape, 1, DataType::F32, _data_layout); + + auto conv = std::make_unique(); + conv->configure(&src_info, &weights_info, &biases_info, &dst_info, info); + ARM_COMPUTE_ASSERT(conv->validate(&src_info, &weights_info, &biases_info, &dst_info, info)); + + // Create tensors + auto src = create_tensor(src_info); + auto weights = create_tensor(weights_info); + auto biases = create_tensor(biases_info); + auto dst = create_tensor(dst_info); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + biases.allocator()->allocate(); + dst.allocator()->allocate(); + + ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &src}, + {arm_compute::TensorType::ACL_SRC_1, &weights}, + {arm_compute::TensorType::ACL_SRC_2, &biases}, + {arm_compute::TensorType::ACL_DST, &dst}}; + ITensorPack prep_pack{{arm_compute::TensorType::ACL_SRC_1, &weights}, + {arm_compute::TensorType::ACL_SRC_2, &biases}}; + + auto const aux_mem_req = conv->workspace(); + auto mg = MemoryGroup{}; + auto ws = manage_workspace(aux_mem_req, mg, run_pack, prep_pack); + + // Fill tensors + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(weights), 1 + _hash); + fill(AccessorType(biases), 2 + _hash); + + conv->prepare(prep_pack); + conv->run(run_pack); + + src.allocator()->free(); + weights.allocator()->free(); + biases.allocator()->free(); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + const PadStrideInfo &info) + { + // Create reference + SimpleTensor src{input_shape, DataType::F32}; + SimpleTensor weights{weights_shape, DataType::F32}; + SimpleTensor bias{bias_shape, DataType::F32}; + + fill(src, 0 + _hash); + fill(weights, 1 + _hash); + fill(bias, 2 + _hash); + + return reference::convolution_layer(src, weights, bias, output_shape, info, _dilation); + } + + TensorType _target{}; + SimpleTensor _reference{}; + Size2D _dilation{}; + int32_t _hash{0}; + DataLayout _data_layout{DataLayout::NHWC}; +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMCONV2DFIXTURE_H diff --git a/tests/validation/fixtures/CpuGemmDirectConv2dFixture.h b/tests/validation/fixtures/CpuGemmDirectConv2dFixture.h new file mode 100644 index 0000000000..2e4000117f --- /dev/null +++ b/tests/validation/fixtures/CpuGemmDirectConv2dFixture.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMDIRECTCONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMDIRECTCONV2DFIXTURE_H + +#include "arm_compute/core/experimental/Types.h" +#include "arm_compute/core/QuantizationInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/graph/Utils.h" + +#include "src/core/helpers/MemoryHelpers.h" +#include "tests/AssetsLibrary.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/ConvolutionLayer.h" +#include "tests/validation/reference/Utils.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ + +template +class CpuGemmDirectConv2dValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape bias_shape, + TensorShape output_shape, + PadStrideInfo info, + Size2D dilation) + { + _dilation = dilation; + _hash = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] + weights_shape[0] + + weights_shape[1] + weights_shape[2] + weights_shape[3]; + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info); + } + +protected: + template + void fill(U &&tensor, int i) + { + std::uniform_real_distribution distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + } + + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + TensorShape output_shape, + const PadStrideInfo &info) + { + // We need to permute to the same layout that the reference impl needs. + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + + const auto src_info = TensorInfo(input_shape, 1, DataType::F32, _data_layout); + const auto weights_info = TensorInfo(weights_shape, 1, DataType::F32, _data_layout); + const auto biases_info = TensorInfo(bias_shape, 1, DataType::F32, _data_layout); + auto dst_info = TensorInfo(output_shape, 1, DataType::F32, _data_layout); + const auto conv_info = Conv2dInfo{info, _dilation, ActivationLayerInfo(), false, 1}; + + auto conv = std::make_unique(); + conv->configure(&src_info, &weights_info, &biases_info, &dst_info, conv_info); + ARM_COMPUTE_ASSERT(conv->validate(&src_info, &weights_info, &biases_info, &dst_info, conv_info)); + + // Create tensors + auto src = create_tensor(src_info); + auto weights = create_tensor(weights_info); + auto biases = create_tensor(biases_info); + auto dst = create_tensor(dst_info); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + biases.allocator()->allocate(); + dst.allocator()->allocate(); + + ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &src}, + {arm_compute::TensorType::ACL_SRC_1, &weights}, + {arm_compute::TensorType::ACL_SRC_2, &biases}, + {arm_compute::TensorType::ACL_DST, &dst}}; + ITensorPack prep_pack{{arm_compute::TensorType::ACL_SRC_1, &weights}, + {arm_compute::TensorType::ACL_SRC_2, &biases}}; + + auto const aux_mem_req = conv->workspace(); + auto mg = MemoryGroup{}; + auto ws = manage_workspace(aux_mem_req, mg, run_pack, prep_pack); + + // Fill tensors + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(weights), 1 + _hash); + fill(AccessorType(biases), 2 + _hash); + + conv->prepare(prep_pack); + conv->run(run_pack); + + src.allocator()->free(); + weights.allocator()->free(); + biases.allocator()->free(); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + const PadStrideInfo &info) + { + // Create reference + SimpleTensor src{input_shape, DataType::F32}; + SimpleTensor weights{weights_shape, DataType::F32}; + SimpleTensor bias{bias_shape, DataType::F32}; + + fill(src, 0 + _hash); + fill(weights, 1 + _hash); + fill(bias, 2 + _hash); + + return reference::convolution_layer(src, weights, bias, output_shape, info, _dilation); + } + + TensorType _target{}; + int32_t _hash{0}; + SimpleTensor _reference{}; + Size2D _dilation{}; + DataLayout _data_layout{DataLayout::NHWC}; +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMDIRECTCONV2DFIXTURE_H diff --git a/tests/validation/fixtures/CpuTransposeFixture.h b/tests/validation/fixtures/CpuTransposeFixture.h new file mode 100644 index 0000000000..4d08334bad --- /dev/null +++ b/tests/validation/fixtures/CpuTransposeFixture.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUTRANSPOSEFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CPUTRANSPOSEFIXTURE_H + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/Permute.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class CpuTransposeValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, DataType data_type) + { + _target = compute_target(shape, data_type); + _reference = compute_reference(shape, data_type); + } + +protected: + template + void fill(U &&tensor) + { + library->fill_tensor_uniform(tensor, 0); + } + + TensorType compute_target(const TensorShape &shape, DataType data_type) + { + // Make rows the columns of the original shape + TensorShape output_shape{ shape[1], shape[0] }; + + // Create tensors + TensorType src = create_tensor(shape, data_type); + TensorType dst = create_tensor(output_shape, data_type); + + // Create and configure function + FunctionType trans_func; + trans_func.configure(src.info(), dst.info()); + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(src)); + + // Compute function + ITensorPack run_pack{ { arm_compute::TensorType::ACL_SRC, &src }, { arm_compute::TensorType::ACL_DST, &dst } }; + trans_func.run(run_pack); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &shape, DataType data_type) + { + // Create reference + SimpleTensor src{ shape, data_type }; + + // Fill reference + fill(src); + + return reference::permute(src, PermutationVector(1U, 0U)); + } + + TensorType _target{}; + SimpleTensor _reference{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUTRANSPOSEFIXTURE_H diff --git a/tests/validation/fixtures/CpuWinogradConv2dFixture.h b/tests/validation/fixtures/CpuWinogradConv2dFixture.h new file mode 100644 index 0000000000..d390aded28 --- /dev/null +++ b/tests/validation/fixtures/CpuWinogradConv2dFixture.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUWINOGRADCONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CPUWINOGRADCONV2DFIXTURE_H + +#include "tests/validation/fixtures/WinogradConvolutionLayerFixture.h" + +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ + +template +class CpuWinogradConv2dValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape bias_shape, + TensorShape output_shape, + PadStrideInfo info, + Size2D dilation, + ActivationLayerInfo act_info) + { + ARM_COMPUTE_UNUSED(dilation); + _act_info = act_info; + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info); + _reference = compute_reference(input_shape, weights_shape, bias_shape, info); + } + +protected: + template + void fill(U &&tensor, int i, float min, float max) + { + std::uniform_real_distribution distribution(min, max); + library->fill(tensor, distribution, i); + } + + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + TensorShape bias_shape, + TensorShape output_shape, + const PadStrideInfo &info) + { + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + + // Create tensors + TensorType src = create_tensor(input_shape, _data_type, 1, QuantizationInfo(), _data_layout); + TensorType weights = create_tensor(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout); + TensorType bias = create_tensor(bias_shape, _data_type, 1, QuantizationInfo(), _data_layout); + TensorType dst = create_tensor(output_shape, _data_type, 1, QuantizationInfo(), _data_layout); + + // Create and configure function + auto conv = std::make_unique(); + ARM_COMPUTE_EXPECT(static_cast(conv->validate(src.info(), weights.info(), bias.info(), dst.info(), info, + _act_info, true)), + framework::LogLevel::ERRORS); + conv->configure(src.info(), weights.info(), bias.info(), dst.info(), info, _act_info, true); + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({&src, &weights, &bias, &dst}, _data_layout); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + dst.allocator()->allocate(); + bias.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(src), 0, -0.5f, 0.5f); + fill(AccessorType(weights), 1, -0.5f, 0.5f); + fill(AccessorType(bias), 2, -0.5f, 0.5f); + + // Compute function + ITensorPack run_pack = {{ACL_SRC_0, &src}, {ACL_SRC_1, &weights}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}}; + ITensorPack prep_pack = {{ACL_SRC_1, &weights}, {ACL_SRC_2, &bias}}; + + auto const aux_mem_req = conv->workspace(); + auto mg = MemoryGroup{}; + auto ws = manage_workspace(aux_mem_req, mg, run_pack, prep_pack); + + conv->prepare(prep_pack); + conv->run(run_pack); + + src.allocator()->free(); + weights.allocator()->free(); + bias.allocator()->free(); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const PadStrideInfo &info) + { + // Create reference + SimpleTensor src_t{input_shape, _data_type, 1}; + SimpleTensor weights_t{weights_shape, _data_type, 1}; + SimpleTensor bias_t{bias_shape, _data_type, 1}; + + // Fill reference + fill(src_t, 0, -0.5f, 0.5f); + SimpleTensor src_t1(copy_tensor(src_t)); + + fill(weights_t, 1, -0.5f, 0.5f); + SimpleTensor weights_t1(copy_tensor(weights_t)); + fill(bias_t, 2, -0.5f, 0.5f); + SimpleTensor bias_t1(copy_tensor(bias_t)); + + // Set output tile + Size2D output_tile(4U, 4U); + if (weights_shape[0] == 7 && weights_shape[1] == 1) + { + output_tile.width = 2; + output_tile.height = 1; + } + else if (weights_shape[0] == 1 && weights_shape[1] == 7) + { + output_tile.width = 1; + output_tile.height = 2; + } + else if (weights_shape[0] == 1) + { + output_tile.width = 1; + } + else if (weights_shape[1] == 1) + { + output_tile.height = 1; + } + + WinogradInfo winograd_info(output_tile, Size2D(weights_shape[0], weights_shape[1]), + Size2D(input_shape[0], input_shape[1]), info, src_t1.data_layout()); + + // Compute tensor shapes for input, filter and output transforms + TensorShape input_transform_shape = + compute_winograd_input_transform_shape(TensorInfo(input_shape, 1, _data_type), winograd_info); + TensorShape filter_transform_shape = + compute_winograd_filter_transform_shape(TensorInfo(weights_shape, 1, _data_type), winograd_info); + TensorShape batched_gemm_shape = input_transform_shape; + batched_gemm_shape[0] = filter_transform_shape[0]; + TensorShape output_transform_shape = + compute_winograd_output_transform_shape(TensorInfo(batched_gemm_shape, 1, _data_type), winograd_info); + + // Dummy matrix C to perform matrix multiplication + SimpleTensor dummy_c{batched_gemm_shape, _data_type, 1}; + + // Compute Winograd-based convolution + SimpleTensor input_transform_out = + reference::winograd_input_transform(src_t1, input_transform_shape, winograd_info); + + SimpleTensor filter_transform_out = + reference::winograd_filter_transform(weights_t1, filter_transform_shape, winograd_info); + SimpleTensor batched_gemm = + reference::gemm(input_transform_out, filter_transform_out, dummy_c, 1.0f, 0.0f); + SimpleTensor conv_out = + reference::winograd_output_transform(batched_gemm, bias_t1, output_transform_shape, winograd_info); + SimpleTensor conv_out_t(copy_tensor(conv_out)); + return (_act_info.enabled()) ? reference::activation_layer(conv_out_t, _act_info) : conv_out_t; + } + + TensorType _target{}; + SimpleTensor _reference{}; + ActivationLayerInfo _act_info{}; + DataType _data_type{DataType::F32}; + DataLayout _data_layout{DataLayout::NHWC}; +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUWINOGRADCONV2DFIXTURE_H diff --git a/tests/validation/fixtures/CropResizeFixture.h b/tests/validation/fixtures/CropResizeFixture.h index 30a3fd8569..51db5e0947 100644 --- a/tests/validation/fixtures/CropResizeFixture.h +++ b/tests/validation/fixtures/CropResizeFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE -#define ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CROPRESIZEFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CROPRESIZEFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -49,6 +49,12 @@ class CropResizeFixture : public framework::Fixture void setup(TensorShape src_shape, TensorShape boxes_shape, Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value, bool is_outside_bounds, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(src_shape, boxes_shape, crop_size, method, extrapolation_value, is_outside_bounds, data_type); _reference = compute_reference(src_shape, boxes_shape, crop_size, method, extrapolation_value, is_outside_bounds, data_type); } @@ -131,4 +137,4 @@ class CropResizeFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_CROPRESIZEFIXTURE_H diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h index 83170c413c..30443cc742 100644 --- a/tests/validation/fixtures/DeconvolutionLayerFixture.h +++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,6 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DECONVOLUTIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DECONVOLUTIONLAYERFIXTURE_H + #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" @@ -53,6 +57,12 @@ class DeconvolutionLayerFixtureBase : public framework::Fixture DataType data_type, DataType weights_data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, QuantizationInfo weights_quantization_info, bool add_bias) { + if(std::is_same::value && // Cpu + (data_type == DataType::F16 || weights_data_type == DataType::F16) && !CPUInfo::get().has_fp16()) + { + return; + } + _data_type = data_type; _weights_data_type = weights_data_type; _bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; @@ -248,6 +258,12 @@ class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); @@ -267,6 +283,12 @@ class DeconvolutionValidationAsymmFixture : public DeconvolutionLayerFixtureBase void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int pad_left, unsigned int pad_right, unsigned int pad_top, unsigned int pad_bottom, unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, pad_left, pad_right, pad_top, pad_bottom, DimensionRoundingType::CEIL); @@ -286,6 +308,12 @@ class DeconvolutionValidationQuantizedFixture : public DeconvolutionLayerFixture void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady, unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); @@ -307,6 +335,12 @@ class DeconvolutionValidationQuantizedPerChannelFixture : public DeconvolutionLa unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias, DataType weights_data_type) { + if(std::is_same::value && // Cpu + (data_type == DataType::F16 || weights_data_type == DataType::F16) && !CPUInfo::get().has_fp16()) + { + return; + } + const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); @@ -331,3 +365,5 @@ class DeconvolutionValidationQuantizedPerChannelFixture : public DeconvolutionLa } // namespace validation } // namespace test } // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DECONVOLUTIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/DepthConvertLayerFixture.h b/tests/validation/fixtures/DepthConvertLayerFixture.h index f55d20bf3e..7a60ca8bf5 100644 --- a/tests/validation/fixtures/DepthConvertLayerFixture.h +++ b/tests/validation/fixtures/DepthConvertLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE -#define ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DEPTHCONVERTLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DEPTHCONVERTLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -47,6 +47,12 @@ class DepthConvertLayerValidationBaseFixture : public framework::Fixture public: void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, QuantizationInfo quantization_info) { + if(std::is_same::value && // Cpu + (dt_in == DataType::F16 || dt_out == DataType::F16) && !CPUInfo::get().has_fp16()) + { + return; + } + _shift = shift; _quantization_info = quantization_info; _target = compute_target(shape, dt_in, dt_out, policy, shift); @@ -149,4 +155,4 @@ class DepthConvertLayerValidationQuantizedFixture : public DepthConvertLayerVali } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_DEPTHCONVERTLAYERFIXTURE_H diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index 6e2e3a3846..055e74de89 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -91,6 +91,15 @@ class DepthwiseConvolutionLayerValidationGenericFixture : public framework::Fixt DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false, bool run_twice = false) { ARM_COMPUTE_ERROR_ON(mixed_layout && in_place); + + _skip_test = false; + if(std::is_same::value && // Cpu + (input_data_type == DataType::F16 || weights_data_type == DataType::F16) && !CPUInfo::get().has_fp16()) + { + _skip_test = true; + return; + } + // This hash is used by random generators. There may be hash collisions but // this is intentional as it's a very easy way to make the the current // random generation process almost different for many test configurations, @@ -374,6 +383,7 @@ class DepthwiseConvolutionLayerValidationGenericFixture : public framework::Fixt bool _in_place{ false }; bool _run_twice{ false }; bool _use_dynamic_output_quant{false}; + bool _skip_test{false}; int32_t _hash{0}; // Random initialization limits diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h index 4eb25a5bc5..165cd423df 100644 --- a/tests/validation/fixtures/DequantizationLayerFixture.h +++ b/tests/validation/fixtures/DequantizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_DEQUANTIZATION_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_DEQUANTIZATION_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DEQUANTIZATIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DEQUANTIZATIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -49,6 +49,12 @@ class DequantizationValidationFixture : public framework::Fixture public: void setup(TensorShape shape, DataType src_data_type, DataType dst_datatype, DataLayout data_layout) { + if(std::is_same::value && // Cpu + (src_data_type == DataType::F16 || dst_datatype == DataType::F16) && !CPUInfo::get().has_fp16()) + { + return; + } + _quantization_info = generate_quantization_info(src_data_type, shape.z()); _target = compute_target(shape, src_data_type, dst_datatype, data_layout); _reference = compute_reference(shape, src_data_type); @@ -164,4 +170,4 @@ class DequantizationValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DEQUANTIZATION_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_DEQUANTIZATIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/DirectConvolution3DFixture.h b/tests/validation/fixtures/DirectConvolution3DFixture.h index e80ad2f54f..fb8db15a47 100644 --- a/tests/validation/fixtures/DirectConvolution3DFixture.h +++ b/tests/validation/fixtures/DirectConvolution3DFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,7 @@ class DirectConvolution3DValidationGenericFixture : public framework::Fixture { public: using TBias = typename std::conditional < std::is_same::value || std::is_same::value, int32_t, T >::type; + using TAcc = typename std::conditional < std::is_integral::value, int32_t, float >::type; void setup(const TensorShape &input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth, unsigned int num_kernels, bool has_bias, const ActivationLayerInfo &act_info, const DataType &data_type, const DataLayout &data_layout, @@ -53,6 +54,12 @@ class DirectConvolution3DValidationGenericFixture : public framework::Fixture { ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NDHWC); + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + const TensorShape weights_shape(num_kernels, input_shape[0], kernel_width, kernel_height, kernel_depth); const TensorShape bias_shape(num_kernels); const DataType bias_data_type = is_data_type_quantized(data_type) ? DataType::S32 : data_type; @@ -150,7 +157,7 @@ class DirectConvolution3DValidationGenericFixture : public framework::Fixture fill(bias, 2); } - return reference::activation_layer(reference::conv3d(src, weights, bias, dst, conv3d_info), conv3d_info.act_info); + return reference::activation_layer(reference::conv3d(src, weights, bias, dst, conv3d_info), conv3d_info.act_info); } TensorType _target{}; diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h index 6f204642ca..debfce9142 100644 --- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -93,6 +93,12 @@ class DirectConvolutionValidationGenericFixture : public framework::Fixture void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout, bool mixed_layout = false) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + // This hash is used by random generators. There may be hash collisions but // this is intentional as it's a very easy way to make the the current // random generation process almost different for many test configurations, @@ -133,6 +139,12 @@ class DirectConvolutionValidationGenericFixture : public framework::Fixture ARM_COMPUTE_ERROR_ON(data_layout == DataLayout::UNKNOWN); ARM_COMPUTE_UNUSED(dilation); + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + // This hash is used by random generators. There may be hash collisions but // this is intentional as it's a very easy way to make the the current // random generation process almost different for many test configurations, diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h index f36a1f75b7..0c809b001b 100644 --- a/tests/validation/fixtures/ElementwiseOperationsFixture.h +++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,13 @@ class ArithmeticOperationsGenericFixture : public framework::Fixture DataType data_type0, DataType data_type1, DataType output_data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace = false, bool use_dynamic_shape = false) { + if(std::is_same::value && // Cpu + (data_type0 == DataType::F16 || data_type1 == DataType::F16 || output_data_type == DataType::F16) && + !CPUInfo::get().has_fp16()) + { + return; + } + _op = op; _use_dynamic_shape = use_dynamic_shape; _is_inplace = is_inplace; diff --git a/tests/validation/fixtures/ElementwiseUnaryFixture.h b/tests/validation/fixtures/ElementwiseUnaryFixture.h index 15344288db..70f6ea9172 100644 --- a/tests/validation/fixtures/ElementwiseUnaryFixture.h +++ b/tests/validation/fixtures/ElementwiseUnaryFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE -#define ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEUNARYFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEUNARYFIXTURE_H #include "arm_compute/core/QuantizationInfo.h" #include "arm_compute/core/TensorShape.h" @@ -53,6 +53,12 @@ class ElementWiseUnaryValidationFixture : public framework::Fixture void setup(TensorShape input_shape, DataType input_data_type, bool in_place, ElementWiseUnary op, bool use_dynamic_shape = false, QuantizationInfo qinfo = QuantizationInfo(), QuantizationInfo qinfo_out = QuantizationInfo()) { + if(std::is_same::value && // Cpu + input_data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _op = op; _target = compute_target(input_shape, input_data_type, in_place, qinfo, qinfo_out); _reference = compute_reference(input_shape, input_data_type, qinfo, qinfo_out); @@ -444,4 +450,4 @@ class RoundQuantizedValidationFixture : public ElementWiseUnaryValidationFixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEUNARYFIXTURE_H diff --git a/tests/validation/fixtures/FlattenLayerFixture.h b/tests/validation/fixtures/FlattenLayerFixture.h index e72487c7cf..ee48e1d6cf 100644 --- a/tests/validation/fixtures/FlattenLayerFixture.h +++ b/tests/validation/fixtures/FlattenLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_FLATTEN_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_FLATTEN_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_FLATTENLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_FLATTENLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -52,6 +52,12 @@ class FlattenLayerValidationFixture : public framework::Fixture public: void setup(TensorShape shape, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + TensorShape shape_flatten; TensorInfo input_info(shape, 1, data_type); shape_flatten = compute_flatten_shape(&input_info); @@ -118,4 +124,4 @@ class FlattenLayerValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_FLATTEN_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_FLATTENLAYERFIXTURE_H diff --git a/tests/validation/fixtures/FloorFixture.h b/tests/validation/fixtures/FloorFixture.h index 7d38666f47..5cbf2b8e9c 100644 --- a/tests/validation/fixtures/FloorFixture.h +++ b/tests/validation/fixtures/FloorFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_FLOOR_FIXTURE -#define ARM_COMPUTE_TEST_FLOOR_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_FLOORFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_FLOORFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -46,6 +46,12 @@ class FloorValidationFixture : public framework::Fixture public: void setup(TensorShape shape, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, data_type); _reference = compute_reference(shape, data_type); } @@ -103,4 +109,4 @@ class FloorValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_FLOOR_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_FLOORFIXTURE_H diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h index 344187868f..481a3b7659 100644 --- a/tests/validation/fixtures/FullyConnectedLayerFixture.h +++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h @@ -92,6 +92,12 @@ class FullyConnectedLayerValidationGenericFixture : public framework::Fixture void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info, bool mixed_layout = false) { + if(std::is_same::value && // Cpu + data_type==DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + ARM_COMPUTE_UNUSED(weights_shape); ARM_COMPUTE_UNUSED(bias_shape); @@ -459,6 +465,12 @@ class FullyConnectedWithDynamicTensorsFixture : public framework::Fixture void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape, DataType data_type, ActivationLayerInfo activation_info, bool constant_weights, bool constant_bias, bool weights_reshaped, bool remove_bias = false) { + if(std::is_same::value && // Cpu + data_type==DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _data_type = data_type; const bool is_quantized = is_data_type_quantized(data_type); diff --git a/tests/validation/fixtures/FuseBatchNormalizationFixture.h b/tests/validation/fixtures/FuseBatchNormalizationFixture.h index a05e4169a7..61affff6ba 100644 --- a/tests/validation/fixtures/FuseBatchNormalizationFixture.h +++ b/tests/validation/fixtures/FuseBatchNormalizationFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_FUSEBATCHNORMALIZATION_FIXTURE -#define ARM_COMPUTE_TEST_FUSEBATCHNORMALIZATION_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_FUSEBATCHNORMALIZATIONFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_FUSEBATCHNORMALIZATIONFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -49,6 +49,12 @@ class FuseBatchNormalizationFixture : public framework::Fixture public: void setup(TensorShape shape_w, DataType data_type, DataLayout data_layout, bool in_place, bool with_bias, bool with_gamma, bool with_beta) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + std::tie(_target_w, _target_b) = compute_target(shape_w, data_type, data_layout, in_place, with_bias, with_gamma, with_beta); std::tie(_reference_w, _reference_b) = compute_reference(shape_w, data_type, with_bias, with_gamma, with_beta); } @@ -202,4 +208,4 @@ class FuseBatchNormalizationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_FUSEBATCHNORMALIZATION_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_FUSEBATCHNORMALIZATIONFIXTURE_H diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h index 94bedc83e1..34c0574412 100644 --- a/tests/validation/fixtures/GEMMFixture.h +++ b/tests/validation/fixtures/GEMMFixture.h @@ -51,6 +51,12 @@ class GEMMGenericValidationFixture : public framework::Fixture public: void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + ARM_COMPUTE_UNUSED(pretranspose); _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate); _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate); diff --git a/tests/validation/fixtures/Im2ColFixture.h b/tests/validation/fixtures/Im2ColFixture.h index 5c7978f4ab..8fb53c35b4 100644 --- a/tests/validation/fixtures/Im2ColFixture.h +++ b/tests/validation/fixtures/Im2ColFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_IM2COL_FIXTURE -#define ARM_COMPUTE_TEST_IM2COL_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_IM2COLFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_IM2COLFIXTURE_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorShape.h" @@ -51,6 +51,12 @@ class Im2ColOpValidationFixture : public framework::Fixture void setup(TensorShape input_shape, DataType data_type, const Size2D &kernel_dims, const PadStrideInfo &conv_info, const QuantizationInfo &quant_info, const DataLayout &data_layout, unsigned int num_groups) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _kernel_dims = kernel_dims; _conv_info = conv_info; _quant_info = quant_info; @@ -136,4 +142,4 @@ class Im2ColOpValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_IM2COL_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_IM2COLFIXTURE_H diff --git a/tests/validation/fixtures/InstanceNormalizationLayerFixture.h b/tests/validation/fixtures/InstanceNormalizationLayerFixture.h index c26dd99f02..b78b742e09 100644 --- a/tests/validation/fixtures/InstanceNormalizationLayerFixture.h +++ b/tests/validation/fixtures/InstanceNormalizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_INSTANCENORMALIZATION_FIXTURE -#define ARM_COMPUTE_TEST_INSTANCENORMALIZATION_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_INSTANCENORMALIZATIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_INSTANCENORMALIZATIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -46,6 +46,12 @@ class InstanceNormalizationLayerValidationFixture : public framework::Fixture public: void setup(TensorShape shape, DataType data_type, DataLayout data_layout, bool in_place) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, data_type, data_layout, in_place); _reference = compute_reference(shape, data_type); } @@ -146,4 +152,4 @@ class InstanceNormalizationLayerValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_INSTANCENORMALIZATION_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_INSTANCENORMALIZATIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/L2NormalizeLayerFixture.h b/tests/validation/fixtures/L2NormalizeLayerFixture.h index b8f4b1eaf3..9e65f1eaa5 100644 --- a/tests/validation/fixtures/L2NormalizeLayerFixture.h +++ b/tests/validation/fixtures/L2NormalizeLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE -#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_L2NORMALIZELAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_L2NORMALIZELAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -50,6 +50,12 @@ class L2NormalizeLayerValidationFixture : public framework::Fixture public: void setup(TensorShape shape, DataType data_type, DataLayout data_layout, int axis, float epsilon) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, data_type, data_layout, axis, epsilon); _reference = compute_reference(shape, data_type, data_layout, axis, epsilon); } @@ -134,4 +140,4 @@ class L2NormalizeLayerValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_L2NORMALIZELAYERFIXTURE_H diff --git a/tests/validation/fixtures/LSTMLayerFixture.h b/tests/validation/fixtures/LSTMLayerFixture.h index a32e9adfe5..fa7c7d1d90 100644 --- a/tests/validation/fixtures/LSTMLayerFixture.h +++ b/tests/validation/fixtures/LSTMLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023 Arm Limited. + * Copyright (c) 2018-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_LSTM_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_LSTM_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_LSTMLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_LSTMLAYERFIXTURE_H + +#include "arm_compute/runtime/Allocator.h" +#include "arm_compute/runtime/BlobLifetimeManager.h" +#include "arm_compute/runtime/CL/CLBufferAllocator.h" +#include "arm_compute/runtime/MemoryManagerOnDemand.h" +#include "arm_compute/runtime/PoolManager.h" #include "tests/Globals.h" #include "tests/framework/Asserts.h" @@ -48,10 +54,16 @@ class LSTMLayerValidationFixture : public framework::Fixture public: void setup(TensorShape input_shape, TensorShape input_weights_shape, TensorShape recurrent_weights_shape, TensorShape cell_bias_shape, TensorShape output_cell_shape, TensorShape output_shape, TensorShape scratch_shape, ActivationLayerInfo info, float cell_threshold, float projection_threshold, DataType data_type, bool projection_opt, bool peephole_opt, - bool use_layer_norm) + bool use_layer_norm, bool use_memory_manager) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(input_shape, input_weights_shape, recurrent_weights_shape, cell_bias_shape, output_cell_shape, output_shape, scratch_shape, info, cell_threshold, projection_threshold, - data_type, projection_opt, peephole_opt, use_layer_norm); + data_type, projection_opt, peephole_opt, use_layer_norm, use_memory_manager); _reference = compute_reference(input_shape, input_weights_shape, recurrent_weights_shape, cell_bias_shape, output_cell_shape, output_shape, scratch_shape, info, cell_threshold, projection_threshold, data_type, projection_opt, peephole_opt, use_layer_norm); } @@ -77,7 +89,7 @@ class LSTMLayerValidationFixture : public framework::Fixture } TensorType compute_target(const TensorShape &input_shape, const TensorShape &input_weights_shape, const TensorShape &recurrent_weights_shape, const TensorShape &cell_bias_shape, const TensorShape &output_cell_shape, const TensorShape &output_shape, const TensorShape &scratch_shape, ActivationLayerInfo info, float cell_threshold, - float projection_threshold, DataType data_type, bool projection_opt, bool peephole_opt, bool use_layer_norm) + float projection_threshold, DataType data_type, bool projection_opt, bool peephole_opt, bool use_layer_norm, bool use_memory_manager) { const unsigned int num_cells = input_weights_shape.y(); const unsigned int num_outputs = recurrent_weights_shape.x(); @@ -159,7 +171,17 @@ class LSTMLayerValidationFixture : public framework::Fixture } // Create and configure function - FunctionType lstm; + std::shared_ptr mm = nullptr; + + if(use_memory_manager) + { + auto lifetime_mgr = std::make_shared(); + auto pool_mgr = std::make_shared(); + mm = std::make_shared(lifetime_mgr, pool_mgr); + } + + FunctionType lstm(mm); + lstm.configure(&input, &input_to_forget_w, &input_to_cell_w, &input_to_output_w, &recurrent_to_forget_w, &recurrent_to_cell_w, &recurrent_to_output_w, &forget_gate_bias, &cell_bias, &output_gate_bias, &output_state_in, &cell_state_in, @@ -314,8 +336,27 @@ class LSTMLayerValidationFixture : public framework::Fixture } // Compute function + if(use_memory_manager) + { + if(std::is_same::value) + { + Allocator alloc{}; + mm->populate(alloc, 1); + } + else + { + CLBufferAllocator alloc{}; + mm->populate(alloc, 1); + } + } + lstm.run(); + if(use_memory_manager) + { + mm->clear(); + } + _target_scratch = std::move(scratch); return output; } @@ -535,4 +576,4 @@ class LSTMLayerValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_LSTM_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_LSTMLAYERFIXTURE_H diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h index ffd12e56d0..4ccd2b8266 100644 --- a/tests/validation/fixtures/MatMulFixture.h +++ b/tests/validation/fixtures/MatMulFixture.h @@ -65,6 +65,12 @@ class MatMulGenericValidationFixture : public framework::Fixture QuantizationInfo b_qinfo = QuantizationInfo(), QuantizationInfo o_qinfo = QuantizationInfo()) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + // For brevity, the input shapes are assumed to be not-transposed for both a and b matrices. if (transpose_a) { diff --git a/tests/validation/fixtures/MaxUnpoolingLayerFixture.h b/tests/validation/fixtures/MaxUnpoolingLayerFixture.h index 808e3ffabd..1fd2049272 100644 --- a/tests/validation/fixtures/MaxUnpoolingLayerFixture.h +++ b/tests/validation/fixtures/MaxUnpoolingLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, 2023 Arm Limited. + * Copyright (c) 2020-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_MAXUNPOOLINGLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_MAXUNPOOLINGLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -48,6 +48,12 @@ class MaxUnpoolingLayerValidationGenericFixture : public framework::Fixture public: void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + std::mt19937 gen(library->seed()); std::uniform_int_distribution<> offset_dis(0, 20); const float scale = data_type == DataType::QASYMM8_SIGNED ? 1.f / 127.f : 1.f / 255.f; @@ -159,4 +165,4 @@ class MaxUnpoolingLayerValidationFixture : public MaxUnpoolingLayerValidationGen } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_MAXUNPOOLINGLAYERFIXTURE_H diff --git a/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h b/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h index bf5d20790c..f8176e82ae 100644 --- a/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h +++ b/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023 Arm Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_MEAN_STDDEV_NORMALIZATION_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_MEAN_STDDEV_NORMALIZATION_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_MEANSTDDEVNORMALIZATIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_MEANSTDDEVNORMALIZATIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -46,6 +46,12 @@ class MeanStdDevNormalizationLayerValidationFixture : public framework::Fixture public: void setup(TensorShape shape, DataType dt, bool in_place, float epsilon = 1e-8) { + if(std::is_same::value && // Cpu + dt == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + QuantizationInfo qi = QuantizationInfo(0.5f, 10); _data_type = dt; _target = compute_target(shape, dt, in_place, epsilon, qi); @@ -128,4 +134,4 @@ class MeanStdDevNormalizationLayerValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_MEAN_STDDEV_NORMALIZATION_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_MEANSTDDEVNORMALIZATIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/NormalizationLayerFixture.h b/tests/validation/fixtures/NormalizationLayerFixture.h index ddaa3533f5..06ec88b5af 100644 --- a/tests/validation/fixtures/NormalizationLayerFixture.h +++ b/tests/validation/fixtures/NormalizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_NORMALIZATION_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_NORMALIZATION_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_NORMALIZATIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_NORMALIZATIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -48,6 +48,12 @@ class NormalizationValidationGenericFixture : public framework::Fixture public: void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, bool is_scaled, DataType data_type, DataLayout data_layout) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + NormalizationLayerInfo info(norm_type, norm_size, 5, beta, 1.f, is_scaled); _target = compute_target(shape, info, data_type, data_layout); @@ -126,4 +132,4 @@ class NormalizationValidationFixture : public NormalizationValidationGenericFixt } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_NORMALIZATION_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_NORMALIZATIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/PadLayerFixture.h b/tests/validation/fixtures/PadLayerFixture.h index 93b43616ff..10c7cdab82 100644 --- a/tests/validation/fixtures/PadLayerFixture.h +++ b/tests/validation/fixtures/PadLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_PADLAYER_FIXTURE -#define ARM_COMPUTE_TEST_PADLAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_PADLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_PADLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -46,6 +46,12 @@ class PaddingFixture : public framework::Fixture public: void setup(TensorShape shape, DataType data_type, const PaddingList &padding, const PaddingMode mode) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + PaddingList clamped_padding = padding; if(mode != PaddingMode::CONSTANT) { @@ -132,4 +138,4 @@ class PaddingFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_PADLAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_PADLAYERFIXTURE_H diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h index 4345d8a13f..213e7355a5 100644 --- a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h +++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_FIXTURE -#define ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_PIXELWISEMULTIPLICATIONFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_PIXELWISEMULTIPLICATIONFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -58,6 +58,13 @@ class PixelWiseMultiplicationGenericValidationFixture : public framework::Fixtur ActivationLayerInfo act_info, bool is_inplace) { + if(std::is_same::value && // Cpu + (dt_in1 == DataType::F16 || dt_in2 == DataType::F16 || dt_out == DataType::F16) && + !CPUInfo::get().has_fp16()) + { + return; + } + _is_inplace = is_inplace; _target = compute_target(shape0, shape1, dt_in1, dt_in2, dt_out, scale, convert_policy, rounding_policy, qinfo0, qinfo1, qinfo_out, act_info); _reference = compute_reference(shape0, shape1, dt_in1, dt_in2, dt_out, scale, convert_policy, rounding_policy, qinfo0, qinfo1, qinfo_out, act_info); @@ -233,4 +240,4 @@ class PixelWiseMultiplicationBroadcastValidationQuantizedFixture : public PixelW } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_PIXELWISEMULTIPLICATIONFIXTURE_H diff --git a/tests/validation/fixtures/Pooling3dLayerFixture.h b/tests/validation/fixtures/Pooling3dLayerFixture.h index 1bdf615fb1..1d9ee58df9 100644 --- a/tests/validation/fixtures/Pooling3dLayerFixture.h +++ b/tests/validation/fixtures/Pooling3dLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_POOLING3DLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_POOLING3DLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -47,6 +47,12 @@ class Pooling3dLayerValidationGenericFixture : public framework::Fixture public: void setup(TensorShape shape, Pooling3dLayerInfo pool_info, DataType data_type, QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo()) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, pool_info, data_type, input_qinfo, output_qinfo); _reference = compute_reference(shape, pool_info, data_type, input_qinfo, output_qinfo); } @@ -161,4 +167,4 @@ class SpecialPooling3dLayerValidationFixture : public Pooling3dLayerValidationGe } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_POOLING3DLAYERFIXTURE_H diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h index 59c920868b..c7265a0e8a 100644 --- a/tests/validation/fixtures/PoolingLayerFixture.h +++ b/tests/validation/fixtures/PoolingLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_POOLINGLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_POOLINGLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -48,6 +48,12 @@ class PoolingLayerValidationGenericFixture : public framework::Fixture void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout, bool indices = false, QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo(), bool mixed_layout = false) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _mixed_layout = mixed_layout; _pool_info = pool_info; _target = compute_target(shape, pool_info, data_type, data_layout, input_qinfo, output_qinfo, indices); @@ -225,4 +231,4 @@ class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationGeneric } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_POOLINGLAYERFIXTURE_H diff --git a/tests/validation/fixtures/QuantizationLayerFixture.h b/tests/validation/fixtures/QuantizationLayerFixture.h index 1b21967bda..1cc0a56399 100644 --- a/tests/validation/fixtures/QuantizationLayerFixture.h +++ b/tests/validation/fixtures/QuantizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_QUANTIZATION_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_QUANTIZATION_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_QUANTIZATIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_QUANTIZATIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -48,6 +48,12 @@ class QuantizationValidationGenericFixture : public framework::Fixture public: void setup(TensorShape shape, DataType data_type_in, DataType data_type_out, QuantizationInfo qinfo, QuantizationInfo qinfo_in) { + if(std::is_same::value && // Cpu + (data_type_in == DataType::F16 || data_type_out == DataType::F16) && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, data_type_in, data_type_out, qinfo, qinfo_in); _reference = compute_reference(shape, data_type_in, data_type_out, qinfo, qinfo_in); } @@ -116,4 +122,4 @@ class QuantizationValidationFixture : public QuantizationValidationGenericFixtur } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_QUANTIZATION_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_QUANTIZATIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/RNNLayerFixture.h b/tests/validation/fixtures/RNNLayerFixture.h index e9a05e7838..8741ef4fae 100644 --- a/tests/validation/fixtures/RNNLayerFixture.h +++ b/tests/validation/fixtures/RNNLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_RNN_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_RNN_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_RNNLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_RNNLAYERFIXTURE_H #include "tests/Globals.h" #include "tests/framework/Asserts.h" @@ -45,6 +45,12 @@ class RNNLayerValidationFixture : public framework::Fixture void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape recurrent_weights_shape, TensorShape bias_shape, TensorShape output_shape, ActivationLayerInfo info, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(input_shape, weights_shape, recurrent_weights_shape, bias_shape, output_shape, info, data_type); _reference = compute_reference(input_shape, weights_shape, recurrent_weights_shape, bias_shape, output_shape, info, data_type); } @@ -144,4 +150,4 @@ class RNNLayerValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_RNN_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_RNNLAYERFIXTURE_H diff --git a/tests/validation/fixtures/ROIAlignLayerFixture.h b/tests/validation/fixtures/ROIAlignLayerFixture.h index ad76dcbbd9..fd076862dd 100644 --- a/tests/validation/fixtures/ROIAlignLayerFixture.h +++ b/tests/validation/fixtures/ROIAlignLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_ROIALIGNLAYER_FIXTURE -#define ARM_COMPUTE_TEST_ROIALIGNLAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_ROIALIGNLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_ROIALIGNLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -47,6 +47,12 @@ class ROIAlignLayerGenericFixture : public framework::Fixture public: void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _rois_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::QASYMM16 : data_type; _target = compute_target(input_shape, data_type, data_layout, pool_info, rois_shape, qinfo, output_qinfo); _reference = compute_reference(input_shape, data_type, pool_info, rois_shape, qinfo, output_qinfo); @@ -209,4 +215,4 @@ class ROIAlignLayerQuantizedFixture : public ROIAlignLayerGenericFixture::value && // Cpu + data_type0 == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(data_type0, qinfo0, start, step); _reference = compute_reference(data_type0, qinfo0, start, step); } @@ -138,4 +144,4 @@ class RangeFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_RANGE_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_RANGEFIXTURE_H diff --git a/tests/validation/fixtures/ReduceMeanFixture.h b/tests/validation/fixtures/ReduceMeanFixture.h index e61941435c..9f18497095 100644 --- a/tests/validation/fixtures/ReduceMeanFixture.h +++ b/tests/validation/fixtures/ReduceMeanFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_REDUCE_MEAN_FIXTURE -#define ARM_COMPUTE_TEST_REDUCE_MEAN_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_REDUCEMEANFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_REDUCEMEANFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -49,6 +49,12 @@ class ReduceMeanValidationFixture : public framework::Fixture public: void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output); _reference = compute_reference(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output); } @@ -172,4 +178,4 @@ class ReduceMeanFixture : public ReduceMeanValidationFixture::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN); _keep_dims = keep_dims && !is_arg_min_max; @@ -166,4 +172,4 @@ class ReductionOperationFixture : public ReductionOperationValidationFixture::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _num_dims = shape.num_dimensions(); _target = compute_target(shape, axis_shape, data_type, use_negative_axis, use_inverted_axis); _reference = compute_reference(shape, axis_shape, data_type, use_negative_axis, use_inverted_axis); diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h index 86d89d71f7..03a7ca6ab3 100644 --- a/tests/validation/fixtures/ScaleFixture.h +++ b/tests/validation/fixtures/ScaleFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,12 @@ class ScaleValidationGenericFixture : public framework::Fixture void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, bool align_corners, bool mixed_layout, QuantizationInfo output_quantization_info) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _shape = shape; _policy = policy; _border_mode = border_mode; diff --git a/tests/validation/fixtures/SelectFixture.h b/tests/validation/fixtures/SelectFixture.h index 8cb6f062f9..eef86b808e 100644 --- a/tests/validation/fixtures/SelectFixture.h +++ b/tests/validation/fixtures/SelectFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_SELECT_FIXTURE -#define ARM_COMPUTE_TEST_SELECT_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_SELECTFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_SELECTFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -65,6 +65,12 @@ class SelectValidationFixture : public framework::Fixture public: void setup(TensorShape shape, bool has_same_same_rank, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + TensorShape condition_shape = detail::select_condition_shape(shape, has_same_same_rank); _target = compute_target(shape, condition_shape, data_type); @@ -144,4 +150,4 @@ class SelectValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SELECT_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_SELECTFIXTURE_H diff --git a/tests/validation/fixtures/SliceOperationsFixtures.h b/tests/validation/fixtures/SliceOperationsFixtures.h index b1f91ea2e0..65b8fb88d2 100644 --- a/tests/validation/fixtures/SliceOperationsFixtures.h +++ b/tests/validation/fixtures/SliceOperationsFixtures.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE -#define ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_SLICEOPERATIONSFIXTURES_H +#define ACL_TESTS_VALIDATION_FIXTURES_SLICEOPERATIONSFIXTURES_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -47,6 +47,12 @@ class SliceFixture : public framework::Fixture public: void setup(TensorShape shape, Coordinates starts, Coordinates ends, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, starts, ends, data_type); _reference = compute_reference(shape, starts, ends, data_type); } @@ -112,6 +118,12 @@ class StridedSliceFixture : public framework::Fixture int32_t begin_mask, int32_t end_mask, int32_t shrink_mask, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, starts, ends, strides, begin_mask, end_mask, shrink_mask, data_type); _reference = compute_reference(shape, starts, ends, strides, begin_mask, end_mask, shrink_mask, data_type); } @@ -176,4 +188,4 @@ class StridedSliceFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_SLICEOPERATIONSFIXTURES_H diff --git a/tests/validation/fixtures/SoftmaxLayerFixture.h b/tests/validation/fixtures/SoftmaxLayerFixture.h index f4bf8df9c0..399a8b70c4 100644 --- a/tests/validation/fixtures/SoftmaxLayerFixture.h +++ b/tests/validation/fixtures/SoftmaxLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_SOFTMAX_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_SOFTMAX_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_SOFTMAXLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_SOFTMAXLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -48,6 +48,12 @@ class SoftmaxValidationGenericFixture : public framework::Fixture public: void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, float beta, size_t axis) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _quantization_info = quantization_info; _reference = compute_reference(shape, data_type, quantization_info, beta, axis); @@ -157,4 +163,4 @@ class SoftmaxValidationQuantizedFixture : public SoftmaxValidationGenericFixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SOFTMAX_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_SOFTMAXLAYERFIXTURE_H diff --git a/tests/validation/fixtures/SplitFixture.h b/tests/validation/fixtures/SplitFixture.h index 203925329c..79ce152671 100644 --- a/tests/validation/fixtures/SplitFixture.h +++ b/tests/validation/fixtures/SplitFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_SPLIT_FIXTURE -#define ARM_COMPUTE_TEST_SPLIT_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_SPLITFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_SPLITFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -49,6 +49,12 @@ class SplitFixture : public framework::Fixture public: void setup(TensorShape shape, unsigned int axis, unsigned int splits, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, axis, splits, data_type); _reference = compute_reference(shape, axis, splits, data_type); } @@ -150,6 +156,12 @@ class SplitShapesFixture : public framework::Fixture public: void setup(TensorShape shape, unsigned int axis, std::vector split_shapes, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(shape, axis, split_shapes, data_type); _reference = compute_reference(shape, axis, split_shapes, data_type); } @@ -254,4 +266,4 @@ class SplitShapesFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SPLIT_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_SPLITFIXTURE_H diff --git a/tests/validation/fixtures/UnstackFixture.h b/tests/validation/fixtures/UnstackFixture.h index 30b7dd5539..b543ea263c 100644 --- a/tests/validation/fixtures/UnstackFixture.h +++ b/tests/validation/fixtures/UnstackFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, 2023 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_UNSTACK_FIXTURE -#define ARM_COMPUTE_TEST_UNSTACK_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_UNSTACKFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_UNSTACKFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -49,6 +49,12 @@ class UnstackValidationFixture : public framework::Fixture public: void setup(TensorShape input_shape, int axis, int num, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(input_shape, axis, num, data_type); _reference = compute_reference(input_shape, axis, num, data_type); } @@ -114,4 +120,4 @@ class UnstackValidationFixture : public framework::Fixture } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_UNSTACK_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_UNSTACKFIXTURE_H diff --git a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h index 20b678b36c..4d165a6563 100644 --- a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h +++ b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h @@ -59,6 +59,12 @@ class WinogradConvolutionLayerFastMathValidationFixture : public framework::Fixt DataType data_type, ActivationLayerInfo act_info, const DataLayout &data_layout) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + ARM_COMPUTE_UNUSED(dilation); _mixed_layout = mixed_layout; _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, act_info, data_layout); @@ -244,6 +250,12 @@ class WinogradInputTransformValidationFixture : public framework::Fixture public: void setup(TensorShape input_shape, WinogradInfo winograd_info, DataLayout data_layout, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + TensorShape output_shape = compute_winograd_input_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info); _mixed_layout = mixed_layout; _target = compute_target(input_shape, output_shape, winograd_info, data_layout, data_type); @@ -355,6 +367,12 @@ class WinogradFilterTransformValidationFixture : public framework::Fixture public: void setup(TensorShape input_shape, Size2D output_tile, DataLayout data_layout, DataType data_type) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + WinogradInfo winograd_info(output_tile, Size2D(input_shape[0], input_shape[1]), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */); TensorShape output_shape = compute_winograd_filter_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info); @@ -469,6 +487,12 @@ class WinogradOutputTransformValidationFixture : public framework::Fixture public: void setup(TensorShape input_shape, WinogradInfo winograd_info, DataType data_type, ActivationLayerInfo act_info = ActivationLayerInfo()) { + if(std::is_same::value && // Cpu + data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + _target = compute_target(input_shape, winograd_info, data_type, act_info); _reference = compute_reference(input_shape, winograd_info, data_type, act_info); } diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h index 4c1cc94d3d..0ab90c675f 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h @@ -255,7 +255,6 @@ class DynamicFusionGpuMatMulValidationGenericFixture : public framework::Fixture // We reshape the gemm output back if the tensor is high dimensional if (output_shape_collapsed != output_shape) { - // std::cout << "called reshape: \n"; result = reference::reshape_layer(result, output_shape); } diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp index e4010a507a..38472a9aec 100644 --- a/tests/validation/reference/Conv3D.cpp +++ b/tests/validation/reference/Conv3D.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,7 +58,7 @@ inline bool is_valid_pixel(int i, int min, int max) } // Evaluate the weights against an element in a given tensor. -template < typename T, typename TB, typename std::enable_if < validation::is_floating_point::value &&validation::is_floating_point::value, int >::type = 0 > +template < typename T, typename TB, typename TACC, typename std::enable_if < validation::is_floating_point::value &&validation::is_floating_point::value, int >::type = 0 > T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const Size3D &dilation, int batch, int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) { @@ -73,7 +73,7 @@ T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, c const unsigned int src_height = src.shape()[height_dim]; const unsigned int src_depth = src.shape()[depth_dim]; - T total(0); + TACC total(0); for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) { const int idx_z = z_start + dilation.depth * weight_d; @@ -112,10 +112,10 @@ T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, c const TB *b_ptr = bias.data(); TB bias_value = b_ptr[ch_out]; - return total + bias_value; + return static_cast(total) + bias_value; } -template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same::value || std::is_same::value) > +template < typename T, typename TB, typename TACC, ARM_COMPUTE_REQUIRES_TA(std::is_same::value || std::is_same::value) > T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const Size3D &dilation, int batch, int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) { @@ -143,7 +143,7 @@ T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, c const float multiplier = input_scale * weights_scale / output_scale; arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); - int32_t total(0); + TACC total(0); for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) { const int idx_z = z_start + dilation.depth * weight_d; @@ -189,7 +189,7 @@ T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, c } } // namespace -template +template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info) { // Compute reference @@ -237,7 +237,7 @@ SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weight T *out_ptr = dst.data(); const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch }); - out_ptr[out_offset] = calculate_conv3d(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform()); + out_ptr[out_offset] = calculate_conv3d(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform()); } } } @@ -246,13 +246,13 @@ SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weight return dst; } -template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, +template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info); -template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, +template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info); -template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, +template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info); -template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, +template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info); } // namespace reference } // namespace validation diff --git a/tests/validation/reference/Conv3D.h b/tests/validation/reference/Conv3D.h index e3674f4bfb..a440b15d55 100644 --- a/tests/validation/reference/Conv3D.h +++ b/tests/validation/reference/Conv3D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_CONV3D_LAYER_H -#define ARM_COMPUTE_TEST_CONV3D_LAYER_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H +#define ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H #include "Utils.h" #include "arm_compute/runtime/FunctionDescriptors.h" @@ -37,11 +37,11 @@ namespace validation { namespace reference { -template +template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CONV3D_LAYER_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H diff --git a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp index a7c8a784d9..97a7adaf54 100644 --- a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2022 Arm Limited. + * Copyright (c) 2019, 2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,29 +36,27 @@ namespace reference template SimpleTensor mean_std_normalization_layer(const SimpleTensor &src, float epsilon) { - // Create reference - SimpleTensor dst{ src.shape(), src.data_type(), 1 }; - - const int cols = src.shape()[0]; - const int batch_size = src.shape()[1]; - - for(int i = 0; i < batch_size; ++i) - { - T sum = static_cast(0.f); - T sum_sq = static_cast(0.f); - for(int j = 0; j < cols; ++j) - { - const T value = src[j + i * cols]; - sum += value; - sum_sq += value * value; - } - const T mean = sum / static_cast(cols); - const T var = ((sum_sq / static_cast(cols)) - (mean * mean)) + static_cast(epsilon); - const T stddev_inv = static_cast(1.f) / static_cast(std::sqrt(var)); - for(int j = 0; j < cols; ++j) - { - dst[j + i * cols] = (src[j + i * cols] - mean) * stddev_inv; - } + SimpleTensor dst{ src.shape(), src.data_type(), 1 }; + const int cols = src.shape()[0]; + const int batch_size = src.shape()[1]; + for(int i = 0; i < batch_size; ++i) + { + float sum = static_cast(0.f); + float sum_sq = static_cast(0.f); + for(int j = 0; j < cols; ++j) + { + const T value = src[j + i * cols]; + sum += value; + sum_sq += value * value; + } + const float mean = sum / cols; + const float var = (((sum_sq / cols) - (mean * mean)) + epsilon); + const float stddev_inv = 1.f / std::sqrt(var); + for(int j = 0; j < cols; ++j) + { + const float res = (src[j + i * cols] - mean) * stddev_inv; + dst[j + i * cols] = static_cast(res); + } } return dst; } diff --git a/tests/validation/reference/Permute.cpp b/tests/validation/reference/Permute.cpp index 7aa3011d8f..c06bc752cb 100644 --- a/tests/validation/reference/Permute.cpp +++ b/tests/validation/reference/Permute.cpp @@ -67,6 +67,9 @@ template SimpleTensor permute(const SimpleTensor &src, Permu template SimpleTensor permute(const SimpleTensor &src, PermutationVector perm); template SimpleTensor permute(const SimpleTensor &src, PermutationVector perm); template SimpleTensor permute(const SimpleTensor &src, PermutationVector perm); +#ifdef ARM_COMPUTE_ENABLE_FP16 +template SimpleTensor permute(const SimpleTensor &src, PermutationVector perm); +#endif /* ARM_COMPUTE_ENABLE_FP16 */ } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/runtime/experimental/operators/CpuActivation.cpp b/tests/validation/runtime/experimental/operators/CpuActivation.cpp new file mode 100644 index 0000000000..8b52cc7ffc --- /dev/null +++ b/tests/validation/runtime/experimental/operators/CpuActivation.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/experimental/operators/CpuActivation.h" +#include "tests/NEON/Accessor.h" +#include "tests/datasets/ActivationFunctionsDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/CpuActivationFixture.h" + +/* + * Tests for arm_compute::experimental::op::CpuActivation which is a shallow wrapper for + * arm_compute::cpu::CpuActivation. Any future testing to the functionalities of cpu::CpuActivation + * will be tested in tests/NEON/ActivationLayer.cpp given that op::CpuActivation remain a + * shallow wrapper. +*/ + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Define relative tolerance of the activation layer. + * + * @param[in] activation The activation function used. + * + * @return Relative tolerance depending on the activation function. + */ +RelativeTolerance relative_tolerance(ActivationLayerInfo::ActivationFunction activation) +{ + switch(activation) + { + case ActivationLayerInfo::ActivationFunction::LOGISTIC: + case ActivationLayerInfo::ActivationFunction::ELU: + case ActivationLayerInfo::ActivationFunction::SQRT: + case ActivationLayerInfo::ActivationFunction::TANH: + case ActivationLayerInfo::ActivationFunction::HARD_SWISH: + case ActivationLayerInfo::ActivationFunction::SWISH: + case ActivationLayerInfo::ActivationFunction::GELU: + return RelativeTolerance(0.05f); + case ActivationLayerInfo::ActivationFunction::SOFT_RELU: + return RelativeTolerance(0.00001f); + default: + return RelativeTolerance(0.f); + } +} + +/** Define absolute tolerance of the activation layer. + * + * @param[in] activation The activation function used. + * + * @return Absolute tolerance depending on the activation function. + */ +AbsoluteTolerance absolute_tolerance(ActivationLayerInfo::ActivationFunction activation) +{ + switch(activation) + { + case ActivationLayerInfo::ActivationFunction::LOGISTIC: + case ActivationLayerInfo::ActivationFunction::SQRT: + case ActivationLayerInfo::ActivationFunction::TANH: + case ActivationLayerInfo::ActivationFunction::SWISH: + case ActivationLayerInfo::ActivationFunction::HARD_SWISH: + case ActivationLayerInfo::ActivationFunction::SOFT_RELU: + return AbsoluteTolerance(0.00001f); + default: + return AbsoluteTolerance(0.f); + } +} + +const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(), + framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::HARD_SWISH, ActivationLayerInfo::ActivationFunction::SWISH })); + +/** Input data sets. */ +const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), NeonActivationFunctionsDataset), framework::dataset::make("AlphaBeta", { 0.5f, 1.f })); + +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(OPERATORS) +TEST_SUITE(CpuActivation) + +template +using CpuActivationFixture = CpuActivationValidationFixture; + +TEST_SUITE(SmokeTest) +FIXTURE_DATA_TEST_CASE(SmokeTest, CpuActivationFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType", + DataType::F32))) + +{ + // Validate output + validate(Accessor(_target), _reference, relative_tolerance(_function), 0.f, absolute_tolerance(_function)); +} +TEST_SUITE_END() // SmokeTest + +TEST_SUITE_END() // CpuActivation +TEST_SUITE_END() // OPERATORS +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/runtime/experimental/operators/CpuGemm.cpp b/tests/validation/runtime/experimental/operators/CpuGemm.cpp new file mode 100644 index 0000000000..c6df429a4d --- /dev/null +++ b/tests/validation/runtime/experimental/operators/CpuGemm.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/experimental/operators/CpuGemm.h" +#include "src/core/helpers/MemoryHelpers.h" +#include "tests/NEON/Accessor.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/fixtures/GEMMFixture.h" + +/* + * Tests for arm_compute::experimental::ops::CpuGemm which is a shallow wrapper for + * arm_compute::cpu::CpuGemm. Any future testing to the functionalities of cpu::CpuGemm will + * be tested in tests/NEON/GEMM.cpp given that ops::CpuGemm remain a shallow wrapper. +*/ + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using framework::dataset::make; + +namespace +{ +/** CNN data types */ +const auto CNNDataTypes = make("DataType", +{ + DataType::F32, +}); +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(OPERATORS) + +TEST_SUITE(CPUGEMM) +/** Test case for memory injection in @ref arm_compute::experimental::ops::CpuGemm. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(OpsCpuGemmMemoryInjection, framework::DatasetMode::ALL) +{ + auto gemm = std::make_unique(); + const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32); + const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto gemm_info = GEMMInfo{}; + gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info); + + // telhs are newly created every call of this lambda function + auto lhs = create_tensor(lhs_info); + auto rhs = create_tensor(rhs_info); + auto c = create_tensor(c_info); + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + c.allocator()->allocate(); + + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &lhs }, { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } }; + ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace(gemm->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(lhs), 1.f); + library->fill_tensor_value(Accessor(rhs), 2.f); + library->fill_tensor_value(Accessor(c), 3.f); + // This operator is configured once and captured by this lambda. + gemm->prepare(prep_pack); + gemm->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +DATA_TEST_CASE(OpsCpuGemmValidateAccumulate, framework::DatasetMode::ALL, combine( + zip(make("In0",{ TensorShape(21U, 13U) }), + make("In1", { TensorShape(33U, 21U) }), + make("Dst", { TensorShape(33U, 13U) })), + zip( + make("alpha", { 1.0, 100.0, 1.0, 1.0 }), + make("beta", { 0.0, 0.0, 1.0, 1.0 }), + make("is_c_null", { false, false, false, true }), + make("Expected", { true, false, false, true }))), + shape_a, shape_b, shape_dst, alpha, beta, is_c_null, expected) +{ + /* Accumulation test for GEMM kernels */ + // Create tensors + TensorInfo in_a(shape_a, 1, DataType::F32); + TensorInfo in_b(shape_b, 1, DataType::F32); + TensorInfo in_c(shape_dst, 1, DataType::F32); + TensorInfo dst(shape_dst, 1, DataType::F32); + + GEMMInfo gemm_info = GEMMInfo(); + gemm_info.set_accumulate(true); + + // Validate accumulation + arm_compute::experimental::ops::CpuGemm gemm; + Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info); + ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); +} + +TEST_SUITE_END() // CPUGEMM +TEST_SUITE_END() // OPERATORS +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/runtime/experimental/operators/CpuGemmConv2d.cpp b/tests/validation/runtime/experimental/operators/CpuGemmConv2d.cpp new file mode 100644 index 0000000000..81623aa702 --- /dev/null +++ b/tests/validation/runtime/experimental/operators/CpuGemmConv2d.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/experimental/operators/CpuGemmConv2d.h" + +#include "arm_compute/core/CoreTypes.h" + +#include "src/core/helpers/MemoryHelpers.h" +#include "tests/datasets/TinyConvolutionLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Macros.h" +#include "tests/Globals.h" +#include "tests/NEON/Accessor.h" +#include "tests/Utils.h" +#include "tests/validation/fixtures/ConvolutionLayerFixture.h" +#include "tests/validation/fixtures/CpuGemmConv2dFixture.h" +#include "tests/validation/Validation.h" +/* + * Tests for arm_compute::experimental::op::CpuGemmGemmConv2d which is a shallow wrapper for + * arm_compute::cpu::CpuGemmConv2d. Any future testing to the functionalities of cpu::CpuGemmConv2d will + * be tested in tests/validation/NEON/ConvolutionLayer.cpp given that op::CpuGemmConv2d remain a shallow wrapper. +*/ + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +const RelativeTolerance rel_tolerance_f32(0.01f); +} + +TEST_SUITE(NEON) +TEST_SUITE(OPERATORS) +TEST_SUITE(CpuGemmConv2d) +/** Test case for memory injection in @ref arm_compute::experimental::op::CpuGemmConv2d. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(OpCpuGemmConv2dMemoryInjection, framework::DatasetMode::ALL) +{ + auto conv = std::make_unique(); + + const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW); + const auto weights_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW); + const auto biases_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW); + auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW); + const auto pad_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR); + + conv->configure(&src_info, &weights_info, &biases_info, &dst_info, pad_info); + auto const status = conv->validate(&src_info, &weights_info, &biases_info, &dst_info, pad_info); + ARM_COMPUTE_ASSERT(status); + + auto src = create_tensor(src_info); + auto weights = create_tensor(weights_info); + auto biases = create_tensor(biases_info); + + src.allocator()->allocate(); + weights.allocator()->allocate(); + biases.allocator()->allocate(); + + ITensorPack run_pack{ + {TensorType::ACL_SRC_0, &src}, {TensorType::ACL_SRC_1, &weights}, {TensorType::ACL_SRC_2, &biases}}; + ITensorPack prep_pack{{TensorType::ACL_SRC_1, &weights}, {TensorType::ACL_SRC_2, &biases}}; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace(conv->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weights), 2.f); + library->fill_tensor_value(Accessor(biases), 3.f); + // This operator is configured once and captured by this lambda. + conv->prepare(prep_pack); + conv->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], + framework::LogLevel::ERRORS); + } +} + +using CpuGemmConv2dFixture = CpuGemmConv2dValidationFixture; + +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(SmokeTest, + CpuGemmConv2dFixture, + framework::DatasetMode::PRECOMMIT, + datasets::TinyConvolutionLayerDataset()) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32); +} +TEST_SUITE_END() // F32 + +TEST_SUITE_END() // CpuGemmConv2d +TEST_SUITE_END() // OPERATORS +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/runtime/experimental/operators/CpuGemmDirectConv2d.cpp b/tests/validation/runtime/experimental/operators/CpuGemmDirectConv2d.cpp new file mode 100644 index 0000000000..e3a16a5437 --- /dev/null +++ b/tests/validation/runtime/experimental/operators/CpuGemmDirectConv2d.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/experimental/operators/CpuGemmDirectConv2d.h" + +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/experimental/Types.h" + +#include "src/core/helpers/MemoryHelpers.h" +#include "tests/datasets/TinyConvolutionLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Macros.h" +#include "tests/Globals.h" +#include "tests/NEON/Accessor.h" +#include "tests/Utils.h" +#include "tests/validation/fixtures/ConvolutionLayerFixture.h" +#include "tests/validation/fixtures/CpuGemmDirectConv2dFixture.h" +#include "tests/validation/Validation.h" +/* + * Tests for arm_compute::experimental::op::CpuGemmDirectConv2d which is a shallow wrapper for + * arm_compute::cpu::CpuGemmDirectConv2d. Any future testing to the functionalities of cpu::CpuGemmDirectConv2d will + * be tested in tests/validation/NEON/ConvolutionLayer.cpp given that op::CpuGemmDirectConv2d remain a shallow wrapper. +*/ + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ + +namespace +{ +const RelativeTolerance rel_tolerance_f32(0.01f); +} +TEST_SUITE(NEON) +TEST_SUITE(OPERATORS) + +TEST_SUITE(CpuGemmDirectConv2d) +/** Test case for memory injection in @ref arm_compute::experimental::op::CpuGemmDirectConv2d. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(OpCpuGemmDirectConv2dMemoryInjection, framework::DatasetMode::ALL) +{ + auto conv = std::make_unique(); + + auto src_shape = TensorShape(23U, 27U, 5U); + auto weights_shape = TensorShape(23U, 3U, 5U, 21U); + auto bias_shape = TensorShape(21U); + auto output_shape = TensorShape(11U, 25U, 21U); + + const auto src_info = TensorInfo(src_shape, 1, DataType::F32, DataLayout::NHWC); + const auto weights_info = TensorInfo(weights_shape, 1, DataType::F32, DataLayout::NHWC); + const auto biases_info = TensorInfo(bias_shape, 1, DataType::F32, DataLayout::NHWC); + auto dst_info = TensorInfo(output_shape, 1, DataType::F32, DataLayout::NHWC); + const auto conv_info = Conv2dInfo{PadStrideInfo(2, 1, 0, 0), Size2D(1, 1), ActivationLayerInfo(), false, 1}; + + conv->configure(&src_info, &weights_info, &biases_info, &dst_info, conv_info); + auto const status = conv->validate(&src_info, &weights_info, &biases_info, &dst_info, conv_info); + ARM_COMPUTE_ASSERT(status); + + // tensors are newly created every call of this lambda function + auto src = create_tensor(src_info); + auto weights = create_tensor(weights_info); + auto biases = create_tensor(biases_info); + + src.allocator()->allocate(); + weights.allocator()->allocate(); + biases.allocator()->allocate(); + + ITensorPack run_pack{ + {TensorType::ACL_SRC_0, &src}, {TensorType::ACL_SRC_1, &weights}, {TensorType::ACL_SRC_2, &biases}}; + ITensorPack prep_pack{{TensorType::ACL_SRC_1, &weights}, {TensorType::ACL_SRC_2, &biases}}; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace(conv->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weights), 2.f); + library->fill_tensor_value(Accessor(biases), 3.f); + // This operator is configured once and captured by this lambda. + conv->prepare(prep_pack); + conv->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], + framework::LogLevel::ERRORS); + } +} + +using CpuGemmDirectConv2dFixture = + CpuGemmDirectConv2dValidationFixture; + +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(SmokeTest, + CpuGemmDirectConv2dFixture, + framework::DatasetMode::PRECOMMIT, + datasets::TinyConvolutionLayerDataset()) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32); +} +TEST_SUITE_END() // F32 + +TEST_SUITE_END() // CPUGEMMDIRECTCONV2D +TEST_SUITE_END() // OPERATORS +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/runtime/experimental/operators/CpuTranspose.cpp b/tests/validation/runtime/experimental/operators/CpuTranspose.cpp new file mode 100644 index 0000000000..ccc33d5668 --- /dev/null +++ b/tests/validation/runtime/experimental/operators/CpuTranspose.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/experimental/operators/CpuTranspose.h" +#include "tests/NEON/Accessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/CpuTransposeFixture.h" + +/* + * Tests for arm_compute::experimental::op::CpuTranspose which is a shallow wrapper for + * arm_compute::cpu::CpuTranspose. Any future testing to the functionalities of cpu::CpuTranspose + * will be tested in tests/NEON/Transpose.cpp given that op::CpuTranspose remain a shallow wrapper. +*/ + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(NEON) +TEST_SUITE(OPERATORS) + +TEST_SUITE(CpuTranspose) + +template +using CpuTransposeFixture = CpuTransposeValidationFixture; + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(SmokeTest, CpuTransposeFixture, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small1DShapes(), datasets::Small2DShapes()), + framework::dataset::make("DataType", DataType::U8))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // U8 + +TEST_SUITE_END() // CpuTranspose + +TEST_SUITE_END() // OPERATORS +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/runtime/experimental/operators/CpuWinogradConv2d.cpp b/tests/validation/runtime/experimental/operators/CpuWinogradConv2d.cpp new file mode 100644 index 0000000000..dcd6aec386 --- /dev/null +++ b/tests/validation/runtime/experimental/operators/CpuWinogradConv2d.cpp @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2017-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/experimental/operators/CpuWinogradConv2d.h" + +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/experimental/Types.h" + +#include "src/core/helpers/MemoryHelpers.h" +#include "tests/datasets/SmallConvolutionLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Macros.h" +#include "tests/Globals.h" +#include "tests/NEON/Accessor.h" +#include "tests/Utils.h" +#include "tests/validation/fixtures/ConvolutionLayerFixture.h" +#include "tests/validation/fixtures/CpuWinogradConv2dFixture.h" +#include "tests/validation/Validation.h" +/* + * Tests for arm_compute::experimental::op::CpuWinogradConv2d which is a shallow wrapper for + * arm_compute::cpu::CpuWinogradConv2d. Any future testing to the functionalities of cpu::CpuWinogradConv2d will + * be tested in tests/validation/NEON/ConvolutionLayer.cpp given that op::CpuWinogradConv2d remain a shallow wrapper. +*/ + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using framework::dataset::make; +namespace +{ +const AbsoluteTolerance abs_tolerance_f32(0.002f); /**< Absolute tolerance for FP32 types */ +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(OPERATORS) +TEST_SUITE(CpuWinogradConv2d) +/** Test case for memory injection in @ref arm_compute::experimental::op::CpuWinogradConv2d. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(OpCpuWinogradConv2dMemoryInjection, framework::DatasetMode::ALL) +{ + auto winograd = std::make_unique(); + const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32); + const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32); + const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32); + auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32); + const PadStrideInfo pad_info{}; + + winograd->configure(&src_info, &b_info, &w_info, &dst_info, pad_info); + + // telhs are newly created every call of this lambda function + auto a = create_tensor(src_info); + auto b = create_tensor(b_info); + auto c = create_tensor(w_info); + a.allocator()->allocate(); + b.allocator()->allocate(); + c.allocator()->allocate(); + + ITensorPack run_pack{{TensorType::ACL_SRC_0, &a}, {TensorType::ACL_SRC_1, &b}, {TensorType::ACL_SRC_2, &c}}; + ITensorPack prep_pack{{TensorType::ACL_SRC_1, &b}, {TensorType::ACL_SRC_2, &c}}; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace(winograd->workspace(), mg, run_pack, prep_pack); + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor(dst_info); + dst.allocator()->allocate(); + + run_pack.add_tensor(TensorType::ACL_DST, &dst); + library->fill_tensor_value(Accessor(a), 1.f); + library->fill_tensor_value(Accessor(b), 2.f); + library->fill_tensor_value(Accessor(c), 3.f); + + // This operator is configured once and captured by this lambda. + winograd->prepare(prep_pack); + winograd->run(run_pack); + return dst; + }; + + auto result_0 = run_conv(); + auto result_1 = run_conv(); + + for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], + framework::LogLevel::ERRORS); + } +} + +using CpuWinogradConv2dFixture = + CpuWinogradConv2dValidationFixture; + +const auto ActivationFunctionsDataset = + make("ActivationInfo", + {ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f)}); + +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(SmokeTest, + CpuWinogradConv2dFixture, + framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), ActivationFunctionsDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, abs_tolerance_f32); +} +TEST_SUITE_END() // F32 +TEST_SUITE_END() // CpuWinogradConv2d +TEST_SUITE_END() // OPERATORS +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute