Skip to content

Commit

Permalink
Use Tilera SIMD for Signature matching ala SSE3
Browse files Browse the repository at this point in the history
Makes use of 8-wide byte compare instructions in signature matching.

For allocating aligned memory, _mm_malloc() is SSE only, so added
check for __tile__ to use memalign() instead.

Shows a 13% speed up.
  • Loading branch information
ken-tilera authored and regit committed Aug 1, 2013
1 parent 2b71f3b commit 1e501b6
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 27 deletions.
10 changes: 5 additions & 5 deletions src/detect-engine-siggroup.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (C) 2007-2010 Open Information Security Foundation
/* Copyright (C) 2007-2013 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
Expand Down Expand Up @@ -175,7 +175,7 @@ void SigGroupHeadFree(SigGroupHead *sgh)

PatternMatchDestroyGroup(sgh);

#if defined(__SSE3__)
#if defined(__SSE3__) || defined(__tile__)
if (sgh->mask_array != NULL) {
/* mask is aligned */
SCFreeAligned(sgh->mask_array);
Expand Down Expand Up @@ -1690,7 +1690,7 @@ int SigGroupHeadBuildHeadArray(DetectEngineCtx *de_ctx, SigGroupHead *sgh)
return 0;

BUG_ON(sgh->head_array != NULL);
#if defined(__SSE3__)
#if defined(__SSE3__) || defined(__tile__)
BUG_ON(sgh->mask_array != NULL);

/* mask array is 16 byte aligned for SIMD checking, also we always
Expand All @@ -1706,7 +1706,7 @@ int SigGroupHeadBuildHeadArray(DetectEngineCtx *de_ctx, SigGroupHead *sgh)
}
#endif /* __WORDSIZE */

sgh->mask_array = SCMallocAligned((cnt * sizeof(SignatureMask)), 16);
sgh->mask_array = (SignatureMask *)SCMallocAligned((cnt * sizeof(SignatureMask)), 16);
if (sgh->mask_array == NULL)
return -1;

Expand All @@ -1732,7 +1732,7 @@ int SigGroupHeadBuildHeadArray(DetectEngineCtx *de_ctx, SigGroupHead *sgh)
sgh->head_array[idx].hdr_copy3 = s->hdr_copy3;
sgh->head_array[idx].full_sig = s;

#if defined(__SSE3__)
#if defined(__SSE3__) || defined(__tile__)
sgh->mask_array[idx] = s->mask;
#endif
idx++;
Expand Down
92 changes: 73 additions & 19 deletions src/detect.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (C) 2007-2011 Open Information Security Foundation
/* Copyright (C) 2007-2013 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
Expand Down Expand Up @@ -576,7 +576,7 @@ static inline int SigMatchSignaturesBuildMatchArrayAddSignature(DetectEngineThre
* On 64 bit systems we inspect in 64 sig batches, creating a u64 with flags.
* The size of a register is leading here.
*/
static inline void SigMatchSignaturesBuildMatchArraySIMD(DetectEngineThreadCtx *det_ctx,
static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx,
Packet *p, SignatureMask mask, uint16_t alproto)
{
uint32_t u;
Expand Down Expand Up @@ -712,28 +712,70 @@ static inline void SigMatchSignaturesBuildMatchArraySIMD(DetectEngineThreadCtx *
#error Wordsize (__WORDSIZE) neither 32 or 64.
#endif
}
#endif /* defined(__SSE3__) */
/* end defined(__SSE3__) */
#elif defined(__tile__)

static inline void SigMatchSignaturesBuildMatchArrayNoSIMD(DetectEngineThreadCtx *det_ctx,
/**
* \brief SIMD implementation of mask prefiltering for TILE-Gx
*
* Mass mask matching is done creating a bitmap of signatures that need
* futher inspection.
*/
static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx,
Packet *p, SignatureMask mask, uint16_t alproto)
{
uint32_t u;
register uint64_t bm; /* bit mask, 64 bits used */

/* reset previous run */
det_ctx->match_array_cnt = 0;
/* Keep local copies of variables that don't change during this function. */
uint64_t *mask_vector = (uint64_t*)det_ctx->sgh->mask_array;
uint32_t sig_cnt = det_ctx->sgh->sig_cnt;
SignatureHeader *head_array = det_ctx->sgh->head_array;

Signature **match_array = det_ctx->match_array;
uint32_t match_count = 0;

/* Replicate the packet mask into each byte of the vector. */
uint64_t pm = __insn_shufflebytes(mask, 0, 0);

/* u is the signature index. */
for (u = 0; u < sig_cnt; u += 8) {
/* Load 8 masks */
uint64_t sm = *mask_vector++;
/* Binary AND 8 masks with the packet's mask */
uint64_t r1 = pm & sm;
/* Compare the result with the original mask
* Result if equal puts a 1 in LSB of bytes that match.
*/
bm = __insn_v1cmpeq(sm, r1);

/* Check the LSB bit of each byte in the bit map. Little endian is assumed,
* so the LSB byte is index 0. Uses count trailing zeros to find least
* significant bit that is set. */
while (bm) {
/* Find first bit set starting from LSB. */
unsigned int first_bit = __insn_ctz(bm);
unsigned int first_byte = first_bit >> 3;
unsigned int x = u + first_byte;
if (x >= sig_cnt)
break;
SignatureHeader *s = &head_array[x];

/* Clear the first bit set, so it is not found again. */
bm -= (1UL << first_bit);

for (u = 0; u < det_ctx->sgh->sig_cnt; u++) {
SignatureHeader *s = &det_ctx->sgh->head_array[u];
if ((mask & s->mask) == s->mask) {
if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) {
/* okay, store it */
det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig;
det_ctx->match_array_cnt++;
*match_array++ = s->full_sig;
match_count++;
}
}
}
det_ctx->match_array_cnt = match_count;
}

/* end defined(__tile__) */
#else
/* No SIMD implementation */
/**
* \brief build an array of signatures that will be inspected
*
Expand All @@ -745,15 +787,27 @@ static inline void SigMatchSignaturesBuildMatchArrayNoSIMD(DetectEngineThreadCtx
* \param mask Packets mask
* \param alproto application layer protocol
*/
static void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx,
Packet *p, SignatureMask mask, uint16_t alproto)
static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx,
Packet *p, SignatureMask mask,
uint16_t alproto)
{
#if defined(__SSE3__)
SigMatchSignaturesBuildMatchArraySIMD(det_ctx, p, mask, alproto);
#else
SigMatchSignaturesBuildMatchArrayNoSIMD(det_ctx, p, mask, alproto);
#endif
uint32_t u;

/* reset previous run */
det_ctx->match_array_cnt = 0;

for (u = 0; u < det_ctx->sgh->sig_cnt; u++) {
SignatureHeader *s = &det_ctx->sgh->head_array[u];
if ((mask & s->mask) == s->mask) {
if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) {
/* okay, store it */
det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig;
det_ctx->match_array_cnt++;
}
}
}
}
#endif /* No SIMD implementation */

int SigMatchSignaturesRunPostMatch(ThreadVars *tv,
DetectEngineCtx *de_ctx, DetectEngineThreadCtx *det_ctx, Packet *p,
Expand Down
4 changes: 2 additions & 2 deletions src/detect.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (C) 2007-2011 Open Information Security Foundation
/* Copyright (C) 2007-2013 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
Expand Down Expand Up @@ -949,7 +949,7 @@ typedef struct SigGroupHead_ {

/** array of masks, used to check multiple masks against
* a packet using SIMD. */
#if defined(__SSE3__)
#if defined(__SSE3__) || defined(__tile__)
SignatureMask *mask_array;
#endif
/** chunk of memory containing the "header" part of each
Expand Down
10 changes: 9 additions & 1 deletion src/util-mem.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (C) 2007-2010 Open Information Security Foundation
/* Copyright (C) 2007-2013 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
Expand Down Expand Up @@ -36,6 +36,14 @@
#include "mm_malloc.h"
#endif

#if defined(__tile__)
/* Need to define __mm_ function alternatives, since these are SSE only.
*/
#include <malloc.h>
#define _mm_malloc(a,b) memalign((b),(a))
#define _mm_free(a) free((a))
#endif /* defined(__tile__) */

SC_ATOMIC_EXTERN(unsigned int, engine_stage);

/* Use this only if you want to debug memory allocation and free()
Expand Down

0 comments on commit 1e501b6

Please sign in to comment.