-
Notifications
You must be signed in to change notification settings - Fork 233
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added Mutators to support changing object on read/write #661
Changes from 22 commits
9153e4c
a62a352
e3832e5
6369d64
4a23881
087bd33
13b1dac
1580dfa
9fb7fa5
fc4ca3c
22b9ed8
548974b
b9c9a7f
3355c87
727a8b2
f89adf0
f93f53d
4255be2
d072919
7595fa9
ac0795a
95c67c4
6f26412
2d3dd83
c52a1e6
4a45469
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
%% ------------------------------------------------------------------- | ||
%% | ||
%% riak_kv_mutators - Storage and retrieval for get/put mutation | ||
%% functions | ||
%% | ||
%% Copyright (c) 2013 Basho Technologies, Inc. All Rights Reserved. | ||
%% | ||
%% This file is provided to you under the Apache License, | ||
%% Version 2.0 (the "License"); you may not use this file | ||
%% except in compliance with the License. You may obtain | ||
%% a copy of the License at | ||
%% | ||
%% http://www.apache.org/licenses/LICENSE-2.0 | ||
%% | ||
%% Unless required by applicable law or agreed to in writing, | ||
%% software distributed under the License is distributed on an | ||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
%% KIND, either express or implied. See the License for the | ||
%% specific language governing permissions and limitations | ||
%% under the License. | ||
%% | ||
%% ------------------------------------------------------------------- | ||
|
||
%% @doc There are circumstances where the object stored on disk is not | ||
%% the object to return; and there are times the object written to the | ||
%% data storage backend is not meant to be the object given. An | ||
%% example would be storing only meta data for an object on a remote | ||
%% cluster. This module is an interface to register mutators that will | ||
%% can be run. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can haz this sentence fixed next time you're in the file? |
||
%% | ||
%% This doubles as a behavior defining module for the mutators. | ||
%% | ||
%% == Callbacks == | ||
%% | ||
%% A mutator callback must implement 2 function: mutate_put/5 and mutate_get/1. | ||
%% | ||
%% <code><b>mutate_put(MetaData, Value, ExposedMeta, | ||
%% FullObject, BucketProperties) -> Result</b></code> | ||
%% | ||
%% Types: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think edoc can generate all this from dialyzer -type and -callback specs now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking into this (and trying it as well), edoc lists the functions but doesn't give type information. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think if you did like -type metadata() :: dict(). and then used metadata() it'd show them better? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to define better. I'd love to be able to have it automatically use the correct types, but that's a ways out. Adding links to types is a next step, but would it be better inline (in the code element) or in the expanded area? |
||
%% ``` | ||
%% MetaData = dict() | ||
%% Value = term() | ||
%% ExposedMeta = dict() | ||
%% FullObject = riak_object:riak_object() | ||
%% BucketProperties = orddict:orddict() | ||
%% Result = {NewMeta, NewValue, NewExposedMeta} | ||
%% NewMeta = dict() | ||
%% NewValue = term() | ||
%% NewExposedMeta = dict()''' | ||
%% | ||
%% The mutate_put callback is called for each metadata/value pair a riak_object | ||
%% has. The return value of NewMeta and NewValue are used by the storage backend | ||
%% while the NewExposedMeta is used for the client return where NewMeta would | ||
%% normally. The NewExposedMeta is merged with the NewMeta to generate the | ||
%% exposed metadata; if the same key is found, the NewExposedMeta value is used. | ||
%% | ||
%% The mutations are run in the same process as the vnode. | ||
%% | ||
%% <code><b>mutate_get(Object) -> Result</b></code> | ||
%% | ||
%% Types: | ||
%% ``` | ||
%% Object = riak_object:riak_object() | ||
%% Result = riak_object:riak_object() | 'notfound' | ||
%% ''' | ||
%% Take the object from storage and reverse whatever mutation was applied. Note | ||
%% the bucket properties are not part of this callback, so if some data is | ||
%% important to reverse a mutation, it must be put in the metadata by the | ||
%% `mutate_put' function. Also note how the entire object is given as opposed to | ||
%% simply a metadata/value pair. Care must be taken not to corrupt the object. | ||
%% | ||
%% A return of ``'notfound''' stops the mutator chain and returns immediately. This | ||
%% provides an escape hatch of sorts; if the mutator cannot reverse the mutation | ||
%% effectively, return ``'notfound'''. | ||
|
||
-module(riak_kv_mutator). | ||
|
||
-export([register/1, register/2, unregister/1]). | ||
-export([get/0]). | ||
-export([mutate_put/2, mutate_get/1]). | ||
|
||
-callback mutate_put(Meta :: dict(), Value :: any(), ExposedMeta :: dict(), FullObject :: riak_object:riak_object(), BucketProps :: orddict:orddict()) -> {dict(), any(), dict()}. | ||
-callback mutate_get(FullObject :: riak_object:riak_object()) -> riak_object:riak_object() | 'notfound'. | ||
|
||
-define(DEFAULT_PRIORITY, 0). | ||
|
||
%% @doc Register the given module as a mutator with the default priority of 0. | ||
%% @see register/2 | ||
-spec register(Module :: atom()) -> 'ok'. | ||
register(Module) -> | ||
?MODULE:register(Module, ?DEFAULT_PRIORITY). | ||
|
||
%% @doc Register a module as a mutator with the given priority. Modules with | ||
%% equal priority are done in sort sort (alphabetical) order. A module | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sort sort == sort ^ 2 ? |
||
%% can only be registered once. When there is a conflict (two different | ||
%% lists), those lists are merged. | ||
-spec register(Module :: atom(), Priority :: term()) -> 'ok'. | ||
register(Module, Priority) -> | ||
Modifier = fun | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It'd be nice to comment what resolution strategy this is trying to implement. |
||
(undefined) -> | ||
[{Module, Priority}]; | ||
(Values) -> | ||
Values2 = merge_values(Values), | ||
orddict:store(Module, Priority, Values2) | ||
end, | ||
riak_core_metadata:put({riak_kv, mutators}, list, Modifier). | ||
|
||
%% @doc Remove a module from the mutator list. | ||
-spec unregister(Module :: atom()) -> 'ok'. | ||
unregister(Module) -> | ||
Modifier = fun | ||
(undefined) -> | ||
[]; | ||
(Values) -> | ||
Values2 = merge_values(Values), | ||
orddict:erase(Module, Values2) | ||
end, | ||
riak_core_metadata:put({riak_kv, mutators}, list, Modifier, []). | ||
|
||
%% @doc Retrieve the list of mutators in the order to apply them when doing a | ||
%% a put mutation. To get the order when doing a get mutation, reverse the list. | ||
-spec get() -> [atom()]. | ||
get() -> | ||
Resolver = fun | ||
('$deleted', '$deleted') -> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A note for @jrwest: it seems that we should have the metadata tombstone in a public header or is there a way for resolvers not to use this atom directly? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hrm thats a good point. we certainly want to expose tombstones to resolvers (or at least have the option to) but leaking this is a bit nasty. a macro would probably be quickest, but i wonder if there is a better way... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for this code specifically, does it even call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well no, but I think it's worth handling anyway for our convenience at the very least. I used delete while testing it when the format was changed, for example. |
||
[]; | ||
('$deleted', Values) -> | ||
Values; | ||
(Values, '$deleted') -> | ||
Values; | ||
(Values1, Values2) -> | ||
merge_values([Values1, Values2]) | ||
end, | ||
ModulesAndPriors = riak_core_metadata:get({riak_kv, mutators}, list, [{default, []}, {resolver, Resolver}]), | ||
Flipped = [{P, M} || {M, P} <- ModulesAndPriors], | ||
Sorted = lists:sort(Flipped), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A sort in the critical path could become an issue with multiple mutators. I know we are not planning to add those soon, but it seems that the resolution and merge functions could take care of this directly and store values in sorted order, no? |
||
Modules = [M || {_P, M} <- Sorted], | ||
{ok, Modules}. | ||
|
||
%% @doc Unmutate an object after retrieval from storage. When an object is | ||
%% mutated, the mutators applied are put into the object's metadata. | ||
-spec mutate_get(Object :: riak_object:riak_object()) -> riak_object:riak_object(). | ||
mutate_get(Object) -> | ||
[Meta | _] = riak_object:get_metadatas(Object), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this right? What about siblings with no mutators in their metadata being higher up the list? |
||
case dict:find(mutators_applied, Meta) of | ||
error -> | ||
Object; | ||
{ok, Applied} -> | ||
DeMutateOrder = lists:reverse(Applied), | ||
mutate_get(Object, DeMutateOrder) | ||
end. | ||
|
||
mutate_get(Object, []) -> | ||
Object; | ||
mutate_get(Object, [Mutator | Tail]) -> | ||
% so event though the mutate_put callback has to return | ||
% {Meta, Value, Exposed} values, the get callback gets away with just | ||
% giving the object? This is to avoid complicated interaction with | ||
% notfound return. | ||
case Mutator:mutate_get(Object) of | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Notice that this makes it possible to invalidate all the data in the cluster if a mutator stored in object metadata is removed from the code. Users of this feature will have to be careful about some downgrade scenarios where data is written with a mutator in the list than then disappears on downgrade. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we just leave this to documentation, or should there be some insurance put in the code here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A note in the documentation would be enough for now. It's good to flag the worse case scenarios. |
||
notfound -> | ||
notfound; | ||
Object2 -> | ||
mutate_get(Object2, Tail) | ||
end. | ||
|
||
%% @doc Mutate an object in preparation to storage, returning a tuple of the | ||
%% object to store and the object to return to the client. For each sibling | ||
%% the object has {Meta, Value} pair, each mutator is called with a copy | ||
%% that iteration's Meta used as the exposed meta." Later mutators are | ||
%% given the results of previous mutators. Once all mutations are complete, | ||
%% two {@link riak_object:riak_object()}s are returned. The first is what | ||
%% is to be stored, while the second has the exposed meta set with the | ||
%% orginal value(s). | ||
-spec mutate_put(Object :: riak_object:riak_object(), BucketProps :: orddict:orddict()) -> {riak_object:riak_object(), riak_object:riak_object()}. | ||
mutate_put(Object, BucketProps) -> | ||
Contents = riak_object:get_contents(Object), | ||
{ok, Modules} = ?MODULE:get(), | ||
MetasValuesRevealeds = lists:map(fun({InMeta, InValue}) -> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So for each sibling in the object, we apply all the mutators, then replace the contents of the object with the result? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Really, more comments here would be nice. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hopefully 5934e3b will address this particular issue. |
||
{InMeta1, InValue1, InRevealed} = lists:foldl(fun(Module, {InInMeta, InInValue, InInRevealed}) -> | ||
% why not just give the riak_object? because of a warning | ||
% in riak_object stating that set_contents is for internal | ||
% use only. Hopefully this qualifies. | ||
Module:mutate_put(InInMeta, InInValue, InInRevealed, Object, BucketProps) | ||
end, {InMeta, InValue, dict:new()}, Modules), | ||
InMeta2 = dict:store(mutators_applied, Modules, InMeta1), | ||
{InMeta2, InValue1, InRevealed} | ||
end, Contents), | ||
Contents2 = [{M,V} || {M,V,_R} <- MetasValuesRevealeds], | ||
Mutated = riak_object:set_contents(Object, Contents2), | ||
FakedContents = lists:map(fun({InMeta, InContent, InRevealed}) -> | ||
FixedMeta = dict:merge(fun(_Key, _NotMutated, MutatedVal) -> | ||
MutatedVal | ||
end, InMeta, InRevealed), | ||
{FixedMeta, InContent} | ||
end, MetasValuesRevealeds), | ||
Faked = riak_object:set_contents(Object, FakedContents), | ||
{Mutated, Faked}. | ||
|
||
merge_values([]) -> | ||
[]; | ||
|
||
merge_values(Values) -> | ||
case lists:filter(fun erlang:is_list/1, Values) of | ||
[] -> | ||
[]; | ||
[Head | Tail] -> | ||
merge_values(Tail, Head) | ||
end. | ||
|
||
merge_values([], Acc) -> | ||
Acc; | ||
|
||
merge_values([Head | Tail], Acc) -> | ||
Acc2 = orddict:merge(fun merge_fun/3, Acc, Head), | ||
merge_values(Tail, Acc2). | ||
|
||
merge_fun(_Key, P1, P2) when P1 < P2 -> | ||
P1; | ||
merge_fun(_Key, _P1, P2) -> | ||
P2. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This capability is not being queried anywhere that I can see.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is used in repl before attempting to install the reduced repl mutator: https://github.com/basho/riak_repl/blob/develop/src/riak_repl_sup.erl#L20