Skip to content

Commit

Permalink
Merge pull request #9 from Vagabond/adt/serialization
Browse files Browse the repository at this point in the history
Add to_bin and from_bin serialization/deserialization
  • Loading branch information
mpope9 authored Feb 24, 2020
2 parents 85814a4 + a251a80 commit 74f36f5
Show file tree
Hide file tree
Showing 8 changed files with 385 additions and 65 deletions.
169 changes: 168 additions & 1 deletion c_src/xor_filter_nif.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,27 @@
static ErlNifResourceType* xor8_resource_type;
static ErlNifResourceType* xor16_resource_type;

// portable encoding/decoding helpers
void
unpack_le_u64(uint64_t * dst, uint8_t const * src) {
*dst = ((uint64_t)src[7] << 56) | ((uint64_t)src[6] << 48)
| ((uint64_t)src[5] << 40) | ((uint64_t)src[4] << 32)
| ((uint64_t)src[3] << 24) | ((uint64_t)src[2] << 16)
| ((uint64_t)src[1] << 8) | (uint64_t)src[0];
}

void
pack_le_u64(uint8_t * dst, uint64_t val) {
dst[0] = val & 0xff;
dst[1] = (val >> 8) & 0xff;
dst[2] = (val >> 16) & 0xff;
dst[3] = (val >> 24) & 0xff;
dst[4] = (val >> 32) & 0xff;
dst[5] = (val >> 40) & 0xff;
dst[6] = (val >> 48) & 0xff;
dst[7] = (val >> 56) & 0xff;
}

void
destroy_xor8_filter_resource(ErlNifEnv* env, void* obj)
{
Expand Down Expand Up @@ -208,6 +229,76 @@ xor8_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
return mk_atom(env, "false");
}

static ERL_NIF_TERM
xor8_to_bin_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{

if(argc != 1)
{
return enif_make_badarg(env);
}

xor8_t* filter;
if(!enif_get_resource(env, argv[0], xor8_resource_type, (void**) &filter))
{
return mk_error(env, "get_filter_for_to_bin");
}

size_t bin_size = (sizeof(uint64_t)*2) + (sizeof(uint8_t) * filter->blockLength * 3);

ErlNifBinary bin;

if(!enif_alloc_binary(bin_size, &bin)) {
return mk_error(env, "allocate_binary_for_to_bin");
}

pack_le_u64(bin.data, filter->seed);
pack_le_u64(bin.data + sizeof(uint64_t), filter->blockLength);
memcpy(bin.data + (sizeof(uint64_t) * 2), filter->fingerprints, filter->blockLength*3);

return enif_make_binary(env, &bin);
}

static ERL_NIF_TERM
xor8_from_bin_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{

if(argc != 1)
{
return enif_make_badarg(env);
}

ErlNifBinary bin;

if (!enif_inspect_binary(env, argv[0], &bin)) {
return enif_make_badarg(env);
}

if (bin.size < sizeof(uint64_t) * 2) {
return enif_make_badarg(env);
}

xor8_t* filter =
enif_alloc_resource(xor8_resource_type, sizeof(xor8_t));

unpack_le_u64(&filter->seed, bin.data);
unpack_le_u64(&filter->blockLength, bin.data+sizeof(uint64_t));

if (bin.size != (sizeof(uint64_t)*2) + (filter->blockLength * 3)) {
enif_release_resource(filter);
return enif_make_badarg(env);
}

filter->fingerprints = enif_alloc(filter->blockLength * 3);
memcpy(filter->fingerprints, bin.data+(sizeof(uint64_t) * 2), filter->blockLength * 3);

ERL_NIF_TERM res = enif_make_resource(env, filter);
// release this resource now its owned by Erlang
enif_release_resource(filter);
return res;
}


/* Begin xor16 nif code */
static ERL_NIF_TERM
xor16_initialize(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[], int buffered)
Expand Down Expand Up @@ -329,6 +420,77 @@ xor16_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
return mk_atom(env, "false");
}

static ERL_NIF_TERM
xor16_to_bin_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{

if(argc != 1)
{
return enif_make_badarg(env);
}

xor16_t* filter;
if(!enif_get_resource(env, argv[0], xor16_resource_type, (void**) &filter))
{
return mk_error(env, "get_filter_for_to_bin");
}

size_t bin_size = (sizeof(uint64_t)*2) + (sizeof(uint16_t) * filter->blockLength * 3);

ErlNifBinary bin;

if(!enif_alloc_binary(bin_size, &bin)) {
return mk_error(env, "allocate_binary_for_to_bin");
}

pack_le_u64(bin.data, filter->seed);
pack_le_u64(bin.data + sizeof(uint64_t), filter->blockLength);
// TODO endianness
memcpy(bin.data + (sizeof(uint64_t) * 2), filter->fingerprints, filter->blockLength*sizeof(uint16_t)*3);

return enif_make_binary(env, &bin);
}

static ERL_NIF_TERM
xor16_from_bin_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{

if(argc != 1)
{
return enif_make_badarg(env);
}

ErlNifBinary bin;

if (!enif_inspect_binary(env, argv[0], &bin)) {
return enif_make_badarg(env);
}

if (bin.size < sizeof(uint64_t) * 2) {
return enif_make_badarg(env);
}

xor16_t* filter =
enif_alloc_resource(xor16_resource_type, sizeof(xor16_t));

unpack_le_u64(&filter->seed, bin.data);
unpack_le_u64(&filter->blockLength, bin.data+sizeof(uint64_t));

if (bin.size != (sizeof(uint64_t)*2) + (filter->blockLength * sizeof(uint16_t) * 3)) {
enif_release_resource(filter);
return enif_make_badarg(env);
}

// TODO endianness
filter->fingerprints = enif_alloc(filter->blockLength * sizeof(uint16_t) * 3);
memcpy(filter->fingerprints, bin.data+(sizeof(uint64_t) * 2), filter->blockLength * sizeof(uint16_t) * 3);

ERL_NIF_TERM res = enif_make_resource(env, filter);
// release this resource now its owned by Erlang
enif_release_resource(filter);
return res;
}

static int
nif_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
{
Expand All @@ -345,6 +507,8 @@ static ErlNifFunc nif_funcs[] = {
{"xor8_buffered_initialize_nif_dirty", 1,
xor8_buffered_initialize_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND},
{"xor8_contain_nif", 2, xor8_contain_nif},
{"xor8_to_bin_nif", 1, xor8_to_bin_nif},
{"xor8_from_bin_nif", 1, xor8_from_bin_nif},

{"xor16_initialize_nif", 1, xor16_initialize_nif},
{"xor16_initialize_nif_dirty", 1, xor16_initialize_nif,
Expand All @@ -353,7 +517,10 @@ static ErlNifFunc nif_funcs[] = {
xor16_buffered_initialize_nif},
{"xor16_buffered_initialize_nif_dirty", 1,
xor16_buffered_initialize_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND},
{"xor16_contain_nif", 2, xor16_contain_nif}
{"xor16_contain_nif", 2, xor16_contain_nif},
{"xor16_to_bin_nif", 1, xor16_to_bin_nif},
{"xor16_from_bin_nif", 1, xor16_from_bin_nif},

};

ERL_NIF_INIT(exor_filter, nif_funcs, nif_load, NULL, NULL, NULL);
68 changes: 35 additions & 33 deletions doc/exor_filter.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<hr>

<h1>Module exor_filter</h1>
<ul class="index"><li><a href="#description">Description</a></li><li><a href="#index">Function Index</a></li><li><a href="#functions">Function Details</a></li></ul>Nif wrapper for the xor_filter:
<ul class="index"><li><a href="#description">Description</a></li><li><a href="#types">Data Types</a></li><li><a href="#index">Function Index</a></li><li><a href="#functions">Function Details</a></li></ul>Nif wrapper for the xor_filter:
https://github.com/FastFilter/xor_singleheader.
<p>Copyright © (C) 2019, Matthew Pope</p>

Expand Down Expand Up @@ -54,6 +54,12 @@ <h2><a name="description">Description</a></h2><p>Nif wrapper for the xor_filter:
This can be faster. See xor8_buffered/1 for more information.</p>

Convinience modules <code>xor8</code> and <code>xor16</code> are provided.
<h2><a name="types">Data Types</a></h2>

<h3 class="typedecl"><a name="type-hash_function">hash_function()</a></h3>
<p><tt>hash_function() = default_hash | none | fun((any()) -&gt; non_neg_integer())</tt></p>


<h2><a name="index">Function Index</a></h2>
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#xor16-1">xor16/1</a></td><td>See the xor8/2 documentation.</td></tr>
<tr><td valign="top"><a href="#xor16-2">xor16/2</a></td><td>Initializes the xor filter, and runs the specified pre-defined
Expand All @@ -64,9 +70,8 @@ <h2><a name="index">Function Index</a></h2>
that are over 100,000,000 keys.</td></tr>
<tr><td valign="top"><a href="#xor16_contain-2">xor16_contain/2</a></td><td>Tests to see if the passed argument is in the filter.</td></tr>
<tr><td valign="top"><a href="#xor16_contain-3">xor16_contain/3</a></td><td>Tests to see if the passed argument is in the filter.</td></tr>
<tr><td valign="top"><a href="#xor16_from_bin-1">xor16_from_bin/1</a></td><td></td></tr>
<tr><td valign="top"><a href="#xor16_from_bin-2">xor16_from_bin/2</a></td><td></td></tr>
<tr><td valign="top"><a href="#xor16_to_bin-1">xor16_to_bin/1</a></td><td></td></tr>
<tr><td valign="top"><a href="#xor16_from_bin-1">xor16_from_bin/1</a></td><td>Deserialize the filter from a previous <code>xor16_to_bin</code> call.</td></tr>
<tr><td valign="top"><a href="#xor16_to_bin-1">xor16_to_bin/1</a></td><td>Serialize the filter to a binary.</td></tr>
<tr><td valign="top"><a href="#xor8-1">xor8/1</a></td><td>Initializes the xor filter, and runs the default hash function on
each of the elements in the list.</td></tr>
<tr><td valign="top"><a href="#xor8-2">xor8/2</a></td><td>Initializes the xor filter, and runs the specified pre-defined
Expand All @@ -77,9 +82,8 @@ <h2><a name="index">Function Index</a></h2>
that are over 100,000,000 keys.</td></tr>
<tr><td valign="top"><a href="#xor8_contain-2">xor8_contain/2</a></td><td>Tests to see if the passed argument is in the filter.</td></tr>
<tr><td valign="top"><a href="#xor8_contain-3">xor8_contain/3</a></td><td>Tests to see if the passed argument is in the filter.</td></tr>
<tr><td valign="top"><a href="#xor8_from_bin-1">xor8_from_bin/1</a></td><td></td></tr>
<tr><td valign="top"><a href="#xor8_from_bin-2">xor8_from_bin/2</a></td><td></td></tr>
<tr><td valign="top"><a href="#xor8_to_bin-1">xor8_to_bin/1</a></td><td></td></tr>
<tr><td valign="top"><a href="#xor8_from_bin-1">xor8_from_bin/1</a></td><td>Deserialize the filter from a previous <code>xor8_to_bin</code> call.</td></tr>
<tr><td valign="top"><a href="#xor8_to_bin-1">xor8_to_bin/1</a></td><td>Serialize the filter to a binary.</td></tr>
</table>

<h2><a name="functions">Function Details</a></h2>
Expand All @@ -91,7 +95,7 @@ <h3 class="function"><a name="xor16-1">xor16/1</a></h3>

<h3 class="function"><a name="xor16-2">xor16/2</a></h3>
<div class="spec">
<p><tt>xor16(List::list(), HashFunction::atom() | function()) -&gt; {reference(), atom() | function()} | {error, atom()}</tt><br></p>
<p><tt>xor16(List::list(), HashFunction::<a href="#type-hash_function">hash_function()</a>) -&gt; {reference(), <a href="#type-hash_function">hash_function()</a>} | {error, atom()}</tt><br></p>
</div><p><p>Initializes the xor filter, and runs the specified pre-defined
hash function on each of the elements.</p>

Expand All @@ -105,7 +109,7 @@ <h3 class="function"><a name="xor16_buffered-1">xor16_buffered/1</a></h3>

<h3 class="function"><a name="xor16_buffered-2">xor16_buffered/2</a></h3>
<div class="spec">
<p><tt>xor16_buffered(List::list(), HashFunction::atom() | function()) -&gt; {reference(), atom() | function()} | {error, atom()}</tt><br></p>
<p><tt>xor16_buffered(List::list(), HashFunction::<a href="#type-hash_function">hash_function()</a>) -&gt; {reference(), <a href="#type-hash_function">hash_function()</a>} | {error, atom()}</tt><br></p>
</div><p><p>Similar to the initialize function, but is a buffered version for lists
that are over 100,000,000 keys. Use for greater speed.</p>

Expand All @@ -115,7 +119,7 @@ <h3 class="function"><a name="xor16_buffered-2">xor16_buffered/2</a></h3>

<h3 class="function"><a name="xor16_contain-2">xor16_contain/2</a></h3>
<div class="spec">
<p><tt>xor16_contain(X1::{reference(), atom() | function()}, Key::term()) -&gt; true | false</tt><br></p>
<p><tt>xor16_contain(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}, Key::term()) -&gt; true | false</tt><br></p>
</div><p><p>Tests to see if the passed argument is in the filter. The first
argument must be the pre-initialized filter.</p>

Expand All @@ -127,7 +131,7 @@ <h3 class="function"><a name="xor16_contain-2">xor16_contain/2</a></h3>

<h3 class="function"><a name="xor16_contain-3">xor16_contain/3</a></h3>
<div class="spec">
<p><tt>xor16_contain(X1::{reference(), atom() | function()}, Key::term(), ReturnValue::any()) -&gt; true | any()</tt><br></p>
<p><tt>xor16_contain(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}, Key::term(), ReturnValue::any()) -&gt; true | any()</tt><br></p>
</div><p><p>Tests to see if the passed argument is in the filter. The first
argument must be the pre-initialized filter.</p>

Expand All @@ -137,18 +141,17 @@ <h3 class="function"><a name="xor16_contain-3">xor16_contain/3</a></h3>

<h3 class="function"><a name="xor16_from_bin-1">xor16_from_bin/1</a></h3>
<div class="spec">
<p><tt>xor16_from_bin(Bin) -&gt; any()</tt></p>
</div>

<h3 class="function"><a name="xor16_from_bin-2">xor16_from_bin/2</a></h3>
<div class="spec">
<p><tt>xor16_from_bin(Bin, Hash) -&gt; any()</tt></p>
</div>
<p><tt>xor16_from_bin(X1::{binary(), <a href="#type-hash_function">hash_function()</a>}) -&gt; {reference(), <a href="#type-hash_function">hash_function()</a>}</tt><br></p>
</div><p><p>Deserialize the filter from a previous <code>xor16_to_bin</code> call.</p>

Returns <code>{reference(), hash_function()}</code></p>

<h3 class="function"><a name="xor16_to_bin-1">xor16_to_bin/1</a></h3>
<div class="spec">
<p><tt>xor16_to_bin(X1) -&gt; any()</tt></p>
</div>
<p><tt>xor16_to_bin(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}) -&gt; {binary(), <a href="#type-hash_function">hash_function()</a>}</tt><br></p>
</div><p><p>Serialize the filter to a binary</p>

Returns <code>binary()</code>.</p>

<h3 class="function"><a name="xor8-1">xor8/1</a></h3>
<div class="spec">
Expand All @@ -160,7 +163,7 @@ <h3 class="function"><a name="xor8-1">xor8/1</a></h3>

<h3 class="function"><a name="xor8-2">xor8/2</a></h3>
<div class="spec">
<p><tt>xor8(List::list(), HashFunction::atom() | function()) -&gt; {reference(), atom() | function()} | {error, atom()}</tt><br></p>
<p><tt>xor8(List::list(), HashFunction::<a href="#type-hash_function">hash_function()</a>) -&gt; {reference(), <a href="#type-hash_function">hash_function()</a>} | {error, atom()}</tt><br></p>
</div><p><p>Initializes the xor filter, and runs the specified pre-defined
hash function on each of the elements.</p>

Expand Down Expand Up @@ -197,7 +200,7 @@ <h3 class="function"><a name="xor8_buffered-1">xor8_buffered/1</a></h3>

<h3 class="function"><a name="xor8_buffered-2">xor8_buffered/2</a></h3>
<div class="spec">
<p><tt>xor8_buffered(List::list(), HashFunction::atom() | function()) -&gt; {reference(), atom() | function()} | {error, atom()}</tt><br></p>
<p><tt>xor8_buffered(List::list(), HashFunction::<a href="#type-hash_function">hash_function()</a>) -&gt; {reference(), <a href="#type-hash_function">hash_function()</a>} | {error, atom()}</tt><br></p>
</div><p><p>Similar to the initialize function, but is a buffered version for lists
that are over 100,000,000 keys. Use for greater speed.</p>

Expand All @@ -207,7 +210,7 @@ <h3 class="function"><a name="xor8_buffered-2">xor8_buffered/2</a></h3>

<h3 class="function"><a name="xor8_contain-2">xor8_contain/2</a></h3>
<div class="spec">
<p><tt>xor8_contain(X1::{reference(), atom() | function()}, Key::term()) -&gt; true | false</tt><br></p>
<p><tt>xor8_contain(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}, Key::term()) -&gt; true | false</tt><br></p>
</div><p><p>Tests to see if the passed argument is in the filter. The first
argument must be the pre-initialized filter.</p>

Expand All @@ -220,7 +223,7 @@ <h3 class="function"><a name="xor8_contain-2">xor8_contain/2</a></h3>

<h3 class="function"><a name="xor8_contain-3">xor8_contain/3</a></h3>
<div class="spec">
<p><tt>xor8_contain(X1::{reference(), atom() | function()}, Key::term(), ReturnValue::any()) -&gt; true | any()</tt><br></p>
<p><tt>xor8_contain(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}, Key::term(), ReturnValue::any()) -&gt; true | any()</tt><br></p>
</div><p><p>Tests to see if the passed argument is in the filter. The first
argument must be the pre-initialized filter.</p>

Expand All @@ -230,18 +233,17 @@ <h3 class="function"><a name="xor8_contain-3">xor8_contain/3</a></h3>

<h3 class="function"><a name="xor8_from_bin-1">xor8_from_bin/1</a></h3>
<div class="spec">
<p><tt>xor8_from_bin(Bin) -&gt; any()</tt></p>
</div>

<h3 class="function"><a name="xor8_from_bin-2">xor8_from_bin/2</a></h3>
<div class="spec">
<p><tt>xor8_from_bin(Bin, Hash) -&gt; any()</tt></p>
</div>
<p><tt>xor8_from_bin(X1::{binary(), <a href="#type-hash_function">hash_function()</a>}) -&gt; {reference(), <a href="#type-hash_function">hash_function()</a>}</tt><br></p>
</div><p><p>Deserialize the filter from a previous <code>xor8_to_bin</code> call.</p>

Returns <code>{reference(), hash_function()}</code></p>

<h3 class="function"><a name="xor8_to_bin-1">xor8_to_bin/1</a></h3>
<div class="spec">
<p><tt>xor8_to_bin(X1) -&gt; any()</tt></p>
</div>
<p><tt>xor8_to_bin(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}) -&gt; {binary(), <a href="#type-hash_function">hash_function()</a>}</tt><br></p>
</div><p><p>Serialize the filter to a binary</p>

Returns <code>binary()</code>.</p>
<hr>

<div class="navbar"><a name="#navbar_bottom"></a><table width="100%" border="0" cellspacing="0" cellpadding="2" summary="navigation bar"><tr><td><a href="overview-summary.html" target="overviewFrame">Overview</a></td><td><a href="http://www.erlang.org/"><img src="erlang.png" align="right" border="0" alt="erlang logo"></a></td></tr></table></div>
Expand Down
Binary file removed priv/xor_filter.so
Binary file not shown.
8 changes: 8 additions & 0 deletions rebar.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,11 @@
{post_hooks,
[{"(linux|darwin|solaris)", clean, "make -C c_src clean"},
{"(freebsd)", clean, "gmake -C c_src clean"}]}.

{xref_checks, [
undefined_function_calls,
undefined_functions,
locals_not_used,
deprecated_function_calls,
deprecated_functions
]}.
Loading

0 comments on commit 74f36f5

Please sign in to comment.