From 025e09a218a4dcd55b0c5ef407af9025937828b9 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 22 May 2019 10:58:48 -0400 Subject: [PATCH] Force inlining memcpy for short data This work is based on @derbeyn patch provided on #6678. I reworked it to be more inclusive (works now with both gcc and icc) and to cover more standard size lengths (4, 8, 16). Signed-off-by: George Bosilca Signed-off-by: Nadia Derbey --- opal/datatype/opal_datatype_memcpy.h | 41 ++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/opal/datatype/opal_datatype_memcpy.h b/opal/datatype/opal_datatype_memcpy.h index 972009ac96a..b217a2db7ca 100644 --- a/opal/datatype/opal_datatype_memcpy.h +++ b/opal/datatype/opal_datatype_memcpy.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -11,10 +11,45 @@ * $HEADER$ */ +#include "opal/runtime/opal.h" + #ifndef OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED #define OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED -#define MEMCPY( DST, SRC, BLENGTH ) \ - memcpy( (DST), (SRC), (BLENGTH) ) +#define TYPED_MEMCPY( DST, SRC, BLENGTH, ALIGN, TYPE) \ + if( ((BLENGTH) >= sizeof(TYPE)) && (0 == ((ALIGN) & (sizeof(TYPE) - 1))) ) { \ + TYPE *__dst = (TYPE*)(DST), *__src = (TYPE*)(SRC); \ + if( (BLENGTH) == sizeof(TYPE) ) { \ + *__dst = *__src; \ + break; \ + } \ + size_t _cnt = ((BLENGTH) / sizeof(TYPE)); \ + for( ; _cnt > 0; _cnt--, __dst++, __src++ ) { \ + *__dst = *__src; \ + (BLENGTH) -= sizeof(TYPE); \ + } \ + if( 0 == (BLENGTH) ) break; \ + (DST) = __dst; \ + (SRC) = __src; \ + } + +/* + * This macro is called whenever we are packing/unpacking a DDT that + * that is built with basic datatypes. + * Specifying a fixed size for the memcpy() makes the intel compiler + * inline it as an assignment operation. + */ +#define MEMCPY( DST, SRC, BLENGTH ) \ + do { \ + void *_dst = (void*)(DST), *_src = (void*)(SRC); \ + size_t _blength = (size_t)(BLENGTH); \ + if( _blength < (size_t)opal_cache_line_size ) { \ + uintptr_t align = ((uintptr_t)_dst) | ((uintptr_t)_src); \ + TYPED_MEMCPY( _dst, _src, _blength, align, int64_t ); \ + TYPED_MEMCPY( _dst, _src, _blength, align, int32_t ); \ + TYPED_MEMCPY( _dst, _src, _blength, align, int8_t ); \ + } \ + memcpy( _dst, _src, _blength ); \ + } while (0) #endif /* OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED */