From 297f66c890ae1ff3bc762e82722847cbcecdd718 Mon Sep 17 00:00:00 2001 From: Ken Steele Date: Wed, 31 Jul 2013 15:05:04 -0400 Subject: [PATCH] Tile SIMD implementation of SCMemcmp and SCMemcmpLowercase Based on the SSE3 implementation, it checks 8 bytes at a time. --- src/util-memcmp.h | 120 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-) diff --git a/src/util-memcmp.h b/src/util-memcmp.h index 5424412c04dd..f6935a3f45f0 100644 --- a/src/util-memcmp.h +++ b/src/util-memcmp.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2010 Open Information Security Foundation +/* Copyright (C) 2007-2013 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -20,7 +20,7 @@ * * \author Victor Julien * - * Memcmp implementations for SSE3, SSE4.1 and SSE4.2. + * Memcmp implementations for SSE3, SSE4.1, SSE4.2 and TILE-Gx SIMD. * * Both SCMemcmp and SCMemcmpLowercase return 0 on a exact match, * 1 on a failed match. @@ -342,6 +342,122 @@ static inline int SCMemcmpLowercase(void *s1, void *s2, size_t len) { return 0; } +#elif defined(__tile__) + +#include + +static inline int SCMemcmp(void *s1, void *s2, size_t len) +{ + uint64_t b1, w1, aligned1; + uint64_t b2, w2, aligned2; + + if (len == 0) + return 0; + + /* Load aligned words containing the beginning of each string. + * These loads don't trigger unaligned events. + */ + w1 = __insn_ldna(s1); + w2 = __insn_ldna(s2); + /* Can't just read next 8 bytes because it might go past the end + * of a page. */ + while (len > 8) { + /* Here, the buffer extends into the next word by at least one + * byte, so it is safe to read the next word. Do an aligned + * loads on the next word. Then use the two words to create + * an aligned word from each string. */ + b1 = __insn_ldna(s1 + 8); + b2 = __insn_ldna(s2 + 8); + aligned1 = __insn_dblalign(w1, b1, s1); + aligned2 = __insn_dblalign(w2, b2, s2); + if (aligned1 != aligned2) + return 1; + + /* Move forward one word (8 bytes) */ + w1 = b1; + w2 = b2; + len -= 8; + s1 += 8; + s2 += 8; + } + /* Process the last up-to 8 bytes. */ + do { + if (*(char*)s1 != *(char*)s2) + return 1; + s1++; + s2++; + len--; + } while (len); + + return 0; +} + +/** \brief Convert 8 characters to lower case using SIMD. + * \param Word containing the 8 bytes. + * \return Word containing 8-bytes each converted to lowercase. + */ +static uint64_t +vec_tolower(uint64_t cc) +{ + /* For Uppercases letters, add 32 to convert to lower case. */ + uint64_t less_than_eq_Z = __insn_v1cmpltui (cc, 'Z' + 1); + uint64_t less_than_A = __insn_v1cmpltui (cc, 'A'); + uint64_t is_upper = __insn_v1cmpne (less_than_eq_Z, less_than_A); + return __insn_v1add (cc,__insn_v1shli (is_upper, 5)); +} + +/** \brief compare two buffers in a case insensitive way + * \param s1 buffer already in lowercase + * \param s2 buffer with mixed upper and lowercase + */ +static inline int SCMemcmpLowercase(void *s1, void *s2, size_t len) +{ + uint64_t b1, w1, aligned1; + uint64_t b2, w2, aligned2; + + if (len == 0) + return 0; + + /* TODO Check for already aligned cases. To optimize. */ + + /* Load word containing the beginning of each string. + * These loads don't trigger unaligned events. + */ + w1 = __insn_ldna(s1); + w2 = __insn_ldna(s2); + /* Can't just read next 8 bytes because it might go past the end + * of a page. */ + while (len > 8) { + /* Here, the buffer extends into the next word by at least one + * byte, so it is safe to read the next word. Do aligned + * loads on next word. Then use the two words to create an + * aligned word from each string. */ + b1 = __insn_ldna(s1 + 8); + b2 = __insn_ldna(s2 + 8); + aligned1 = __insn_dblalign(w1, b1, s1); + aligned2 = vec_tolower(__insn_dblalign(w2, b2, s2)); + if (aligned1 != aligned2) + return 1; + + /* Move forward one word (8 bytes) */ + w1 = b1; + w2 = b2; + len -= 8; + s1 += 8; + s2 += 8; + } + + do { + if (*(char*)s1 != tolower(*(char*)s2)) + return 1; + s1++; + s2++; + len--; + } while (len); + + return 0; +} + #else /* No SIMD support, fall back to plain memcmp and a home grown lowercase one */