Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove from cluster #825

Merged
merged 2 commits into from
May 11, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ EJD_INCLUDE = $(EJABBERD_DIR)/include
EJD_PRIV = $(EJABBERD_DIR)/priv
XEP_TOOL = tools/xep_tool
EJD_EBIN = $(EJABBERD_DIR)/ebin
DEVNODES = node1 node2 fed1
DEVNODES = node1 node2 node3 fed1

all: deps compile

Expand Down
61 changes: 53 additions & 8 deletions apps/ejabberd/src/ejabberd_admin.erl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
mnesia_change_nodename/4,
restore/1, % Still used by some modules%%
get_loglevel/0,
join_cluster/1, leave_cluster/0]).
join_cluster/1, leave_cluster/0,
remove_from_cluster/1]).

-include("ejabberd.hrl").
-include("ejabberd_commands.hrl").
Expand All @@ -69,10 +70,6 @@ commands() ->
desc = "Get status of the ejabberd server",
module = ?MODULE, function = status,
args = [], result = {res, restuple}},
#ejabberd_commands{name = stop, tags = [server],
desc = "Stop ejabberd gracefully",
module = init, function = stop,
args = [], result = {res, rescode}},
#ejabberd_commands{name = restart, tags = [server],
desc = "Restart ejabberd gracefully",
module = init, function = restart,
Expand Down Expand Up @@ -150,27 +147,75 @@ commands() ->
module = ejabberd_config, function = reload_cluster,
args = [], result = {res, restuple}},
#ejabberd_commands{name = join_cluster, tags = [server],
desc = "Join the node to the cluster",
desc = "Join the node to a cluster. Call it from the joining node.
Use `-f` or `--force` flag to avoid question prompt and force join the node",
module = ?MODULE, function = join_cluster,
args = [{node, string}],
result = {res, restuple}},
#ejabberd_commands{name = leave_cluster, tags = [server],
desc = "Leave a node from the cluster",
desc = "Leave a node from the cluster. Call it from the node that is going to leave.
Use `-f` or `--force` flag to avoid question prompt and force leave the node from cluster",
module = ?MODULE, function = leave_cluster,
args = [],
result = {res, restuple}},
#ejabberd_commands{name = remove_from_cluster, tags = [server],
desc = "Remove dead node from the cluster. Call it from the member of the cluster.
Use `-f` or `--force` flag to avoid question prompt and force remove the node",
module = ?MODULE, function = remove_from_cluster,
args = [{node, string}],
result = {res, restuple}}
].


%%%
%%% Server management
%%%
-spec remove_from_cluster(string()) -> {ok, string()} |
{node_is_alive, string()} |
{mnesia_error, string()} |
{rpc_error, string()}.
remove_from_cluster(NodeString) ->
Node = list_to_atom(NodeString),
IsNodeAlive = mongoose_cluster:is_node_alive(Node),
case IsNodeAlive of
true ->
remove_rpc_alive_node(Node);
false ->
remove_dead_node(Node)
end.

remove_dead_node(DeadNode) ->
try mongoose_cluster:remove_from_cluster(DeadNode) of
ok ->
String = io_lib:format("The dead node ~p has been removed from the cluster~n", [DeadNode]),
{ok, String}
catch
error:{node_is_alive, DeadNode} ->
String = io_lib:format("The node ~p is alive but shoud not be.~n", [DeadNode]),
{node_is_alive, String};
error:{del_table_copy_schema, R} ->
String = io_lib:format("Cannot delete table schema~n. Reason: ~p", [R]),
{mnesia_error, String}
end.

remove_rpc_alive_node(AliveNode) ->
case rpc:call(AliveNode, mongoose_cluster, leave, []) of
{badrpc, Reason} ->
String = io_lib:format("Cannot remove the node ~p~n. RPC Reason: ~p", [AliveNode, Reason]),
{rpc_error, String};
ok ->
String = io_lib:format("The node ~p has been removed from the cluster~n", [AliveNode]),
{ok, String};
Unknown ->
String = io_lib:format("Unknown error: ~p~n", [Unknown]),
{rpc_error, String}
end.

-spec join_cluster(string()) -> {ok, string()} | {pang, string()} | {alread_joined, string()} |
{mnesia_error, string()} | {error, string()}.
join_cluster(NodeString) ->
NodeAtom = list_to_atom(NodeString),
NodeList = mnesia:system_info(running_db_nodes),
NodeList = mnesia:system_info(db_nodes),
case lists:member(NodeAtom, NodeList) of
true ->
String = io_lib:format("The node ~s has already joined the cluster~n", [NodeString]),
Expand Down
33 changes: 27 additions & 6 deletions apps/ejabberd/src/mongoose_cluster.erl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

%% TODO: it might make sense to expose this stuff as mod_admin_extra_cluster

-export([join/1,
leave/0]).
-export([join/1, leave/0, remove_from_cluster/1, is_node_alive/1]).

-include("ejabberd.hrl").

Expand All @@ -17,8 +16,6 @@
%% This drops all current connections and discards all persistent
%% data from Mnesia. Use with caution!
%% Next time the node starts, it will connect to other members automatically.
%% TODO: when/if exposing through ejabberd_admin make sure it's guarded
%% by an interactive yes/no question or some flag
-spec join(node()) -> ok.
join(ClusterMember) ->
?INFO_MSG("join ~p", [ClusterMember]),
Expand All @@ -33,8 +30,6 @@ join(ClusterMember) ->
%% data from Mnesia. Use with caution!
%% Next time the node starts, it will NOT connect to previous members.
%% Remaining members will remove this node from the cluster Mnesia schema.
%% TODO: when/if exposing through ejabberd_admin make sure it's guarded
%% by an interactive yes/no question or some flag
-spec leave() -> ok.
leave() ->
?INFO_MSG("leave", []),
Expand All @@ -46,10 +41,36 @@ leave() ->
ok = mnesia:start()
end).

%% @doc Remove dead node from the cluster.
%% The removing node must be down
-spec remove_from_cluster(node()) -> ok.
remove_from_cluster(Node) ->
NodeAlive = is_node_alive(Node),
NodeAlive andalso error({node_is_alive, Node}),
remove_dead_from_cluster(Node).

%%
%% Helpers
%%

remove_dead_from_cluster(DeadNode) ->
?INFO_MSG("removing dead node ~p from the cluster", [DeadNode]),
case mnesia:del_table_copy(schema, DeadNode) of
{atomic, ok} ->
ok;
{aborted, R} ->
error({del_table_copy_schema, R})
end.

is_node_alive(Node) ->
try check_networking(Node) of
true ->
true
catch
error:_ ->
false
end.

is_app_running(App) ->
lists:keymember(App, 1, application:which_applications()).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,25 @@ Exit shell and start MongooseIM using `mongooseim start/live`

#### MongooseIM 1.7.0 and newer

In case to remove a node from the cluster call:
Since MongooseIM 1.7.0 there are two commands for removing a node from the cluster.
In case to leave a running node from the cluster call:

```bash
mongooseimctl leave_cluster
```

It makes sense only if the node is the part of any cluster, e.g called `join_cluster` from that node before.
The successful output from above command starts with `You have successfully left the node`.
The successful output from above command starts with `You have successfully left the node from the cluster`.

In order to remove another node from the cluster call following command from one of the cluster members:

```bash
mongooseimctl remove_from_cluster RemoteNodeName
```

Where `RemoteNodeName` is a name of the node that we'd like to remove from our cluster. This command could be useful when
the node is dead and not responding and we'd like to remove it remotely.
The successful output from above command starts with `The node has been removed from the cluster`

#### MongooseIM 1.5.0 - 1.7.0

Expand Down
77 changes: 53 additions & 24 deletions rel/files/mongooseimctl
Original file line number Diff line number Diff line change
Expand Up @@ -31,44 +31,70 @@ if [ -z "$NODENAME_ARG" ]; then
echo "vm.args needs to have either -name or -sname parameter."
exit 1
fi
FORCE_FLAG1='"--force"'
FORCE_FLAG2='"-f"'

NAME_TYPE="${NODENAME_ARG% *}"
NODENAME="${NODENAME_ARG#* }"

join_cluster()
{
echo "Warning. This will drop all current connections and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
read GUARD

if [ $GUARD = "yes" ]; then
echo "Joining the cluster..."
ctl $QUOTED_ARGS
else
echo "Operation discarded by user"
exit 1
fi
WARNING="Warning. This will drop all current connections and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
DISPLAY="Joining the cluster..."
manage_cluster "$WARNING" "$DISPLAY"
}

leave_cluster()
{
echo "Warning. This will drop all current connections and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
read GUARD

if [ $GUARD = "yes" ]; then
echo "Leaving the cluster..."
ctl $QUOTED_ARGS
else
echo "Operation discarded by user"
exit 1
fi
WARNING="Warning. This will drop all current connections and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
DISPLAY="Leaving the cluster..."

manage_cluster "$WARNING" "$DISPLAY"
}

remove_from_cluster()
{
WARNING="Warning. If the node is alive this will drop all current connections on the remote node and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
DISPLAY="Removing node from the cluster..."
manage_cluster "$WARNING" "$DISPLAY"
}

manage_cluster()
{
case $QUOTED_ARGS in
*$FORCE_FLAG1*|*$FORCE_FLAG2*)
QUOTED_ARGS=$(echo $QUOTED_ARGS|sed "s/$FORCE_FLAG1//")
QUOTED_ARGS=$(echo $QUOTED_ARGS|sed "s/$FORCE_FLAG2//")
ctl $QUOTED_ARGS;;
*)
GUARD="unknown"
until [ "$GUARD" = "yes" ] || [ "$GUARD" = "no" ] ; do
echo $1
read GUARD
if [ "$GUARD" = "yes" ]; then
echo $2
ctl $QUOTED_ARGS
elif [ "$GUARD" = "no" ]; then
echo "Operation discarded by user"
exit 1
else
echo "Command unknown. Do you want to continue? (yes/no)"
fi
done;;
esac
}

start ()
{
$RUNNER_SCRIPT_DIR/mongooseim start
}
stop ()
{
$RUNNER_SCRIPT_DIR/mongooseim stop
}

# attach to server
debug ()
Expand All @@ -90,8 +116,9 @@ help ()
echo " debug Attach an interactive Erlang shell to a running MongooseIM node"
echo " live Start MongooseIM node in live (interactive) mode"
echo "MongooseIM cluster management commads:"
echo " join_cluster other_node_name Add current node to cluster"
echo " leave_cluster Leave current node from the cluster"
echo " join_cluster other_node_name Add current node to cluster"
echo " leave_cluster Leave current node from the cluster"
echo " remove_from_cluster other_node_name Remove dead node from the cluster"
echo ""
}

Expand Down Expand Up @@ -239,8 +266,10 @@ done

case $1 in
'start') start;;
'stop') stop;;
'join_cluster') join_cluster;;
'leave_cluster') leave_cluster;;
'remove_from_cluster') remove_from_cluster;;
'debug') debug;;
'live') live;;
'started') wait_for_status 0 30 2;; # wait 30x2s before timeout
Expand Down
2 changes: 0 additions & 2 deletions rel/files/nodetool
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ main(Args) ->
io:format("~p\n", [rpc:call(TargetNode, init, restart, [], 60000)]);
["reboot"] ->
io:format("~p\n", [rpc:call(TargetNode, init, reboot, [], 60000)]);
["remove_from_cluster", DeadNode] ->
io:format("~p\n", [rpc:call(TargetNode, mnesia, del_table_copy, [schema, list_to_atom(DeadNode)] )]);
["rpc", Module, Function | RpcArgs] ->
case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function),
[RpcArgs], 60000) of
Expand Down
28 changes: 28 additions & 0 deletions rel/reltool_vars/node3_vars.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{hosts, "[\"localhost\",
\"anonymous.localhost\",
\"localhost.bis\"
]"}.
{outgoing_s2s_port, 5295}.
{odbc_server, ""}.
{s2s_addr, "{ {s2s_addr, \"localhost2\"}, {127,0,0,1} }."}.
{s2s_default_policy, allow}.
{node_name, "mongooseim3@localhost"}.
{ejabberd_c2s_port, 5262}.
{ejabberd_s2s_in_port, 5291}.
{cowboy_port, 5283}.
{cowboy_port_secure, 5290}.
{ejabberd_service, ""}.
{mod_last, "{mod_last, []},"}.
{mod_privacy, "{mod_privacy, []},"}.
{mod_private, "{mod_private, []},"}.
{mod_roster, "{mod_roster, []},"}.
{mod_http_notification, "{mod_http_notification, []},"}.
{http_api_endpoint, "{5296, \"127.0.0.1\"}"}.
{mod_vcard, "{mod_vcard, [ %{matches, 1},\n"
"%{search, true},\n"
"%{host, directory.@HOST@}\n"
"]},"}.
{s2s_use_starttls, "{s2s_use_starttls, optional}."}.
{s2s_certfile, "{s2s_certfile, \"priv/ssl/fake_server.pem\"}."}.
{tls_config, "{certfile, \"priv/ssl/fake_server.pem\"}, starttls, {ciphers, \"DHE-RSA-AES256-SHA\"},"}.
{secondary_c2s, ""}.
4 changes: 4 additions & 0 deletions test/ejabberd_tests/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
{domain, <<"localhost">>},
{vars, "node2_vars.config"},
{cluster, mim}]},
{mim3, [{node, mongooseim3@localhost},
{domain, <<"localhost">>},
{vars, "node3_vars.config"},
{cluster, mim}]},
{fed, [{node, fed1@localhost},
{domain, <<"fed1">>},
{vars, "fed1_vars.config"},
Expand Down
Loading