Skip to content

Commit

Permalink
Merge pull request #825 from esl/remove-from-cluster
Browse files Browse the repository at this point in the history
remove from cluster
  • Loading branch information
michalwski committed May 11, 2016
2 parents 9afda56 + 1db5345 commit 3b89d5d
Show file tree
Hide file tree
Showing 13 changed files with 393 additions and 152 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ EJD_INCLUDE = $(EJABBERD_DIR)/include
EJD_PRIV = $(EJABBERD_DIR)/priv
XEP_TOOL = tools/xep_tool
EJD_EBIN = $(EJABBERD_DIR)/ebin
DEVNODES = node1 node2 fed1
DEVNODES = node1 node2 node3 fed1

all: deps compile

Expand Down
61 changes: 53 additions & 8 deletions apps/ejabberd/src/ejabberd_admin.erl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
mnesia_change_nodename/4,
restore/1, % Still used by some modules%%
get_loglevel/0,
join_cluster/1, leave_cluster/0]).
join_cluster/1, leave_cluster/0,
remove_from_cluster/1]).

-include("ejabberd.hrl").
-include("ejabberd_commands.hrl").
Expand All @@ -69,10 +70,6 @@ commands() ->
desc = "Get status of the ejabberd server",
module = ?MODULE, function = status,
args = [], result = {res, restuple}},
#ejabberd_commands{name = stop, tags = [server],
desc = "Stop ejabberd gracefully",
module = init, function = stop,
args = [], result = {res, rescode}},
#ejabberd_commands{name = restart, tags = [server],
desc = "Restart ejabberd gracefully",
module = init, function = restart,
Expand Down Expand Up @@ -150,27 +147,75 @@ commands() ->
module = ejabberd_config, function = reload_cluster,
args = [], result = {res, restuple}},
#ejabberd_commands{name = join_cluster, tags = [server],
desc = "Join the node to the cluster",
desc = "Join the node to a cluster. Call it from the joining node.
Use `-f` or `--force` flag to avoid question prompt and force join the node",
module = ?MODULE, function = join_cluster,
args = [{node, string}],
result = {res, restuple}},
#ejabberd_commands{name = leave_cluster, tags = [server],
desc = "Leave a node from the cluster",
desc = "Leave a node from the cluster. Call it from the node that is going to leave.
Use `-f` or `--force` flag to avoid question prompt and force leave the node from cluster",
module = ?MODULE, function = leave_cluster,
args = [],
result = {res, restuple}},
#ejabberd_commands{name = remove_from_cluster, tags = [server],
desc = "Remove dead node from the cluster. Call it from the member of the cluster.
Use `-f` or `--force` flag to avoid question prompt and force remove the node",
module = ?MODULE, function = remove_from_cluster,
args = [{node, string}],
result = {res, restuple}}
].


%%%
%%% Server management
%%%
-spec remove_from_cluster(string()) -> {ok, string()} |
{node_is_alive, string()} |
{mnesia_error, string()} |
{rpc_error, string()}.
remove_from_cluster(NodeString) ->
Node = list_to_atom(NodeString),
IsNodeAlive = mongoose_cluster:is_node_alive(Node),
case IsNodeAlive of
true ->
remove_rpc_alive_node(Node);
false ->
remove_dead_node(Node)
end.

remove_dead_node(DeadNode) ->
try mongoose_cluster:remove_from_cluster(DeadNode) of
ok ->
String = io_lib:format("The dead node ~p has been removed from the cluster~n", [DeadNode]),
{ok, String}
catch
error:{node_is_alive, DeadNode} ->
String = io_lib:format("The node ~p is alive but shoud not be.~n", [DeadNode]),
{node_is_alive, String};
error:{del_table_copy_schema, R} ->
String = io_lib:format("Cannot delete table schema~n. Reason: ~p", [R]),
{mnesia_error, String}
end.

remove_rpc_alive_node(AliveNode) ->
case rpc:call(AliveNode, mongoose_cluster, leave, []) of
{badrpc, Reason} ->
String = io_lib:format("Cannot remove the node ~p~n. RPC Reason: ~p", [AliveNode, Reason]),
{rpc_error, String};
ok ->
String = io_lib:format("The node ~p has been removed from the cluster~n", [AliveNode]),
{ok, String};
Unknown ->
String = io_lib:format("Unknown error: ~p~n", [Unknown]),
{rpc_error, String}
end.

-spec join_cluster(string()) -> {ok, string()} | {pang, string()} | {alread_joined, string()} |
{mnesia_error, string()} | {error, string()}.
join_cluster(NodeString) ->
NodeAtom = list_to_atom(NodeString),
NodeList = mnesia:system_info(running_db_nodes),
NodeList = mnesia:system_info(db_nodes),
case lists:member(NodeAtom, NodeList) of
true ->
String = io_lib:format("The node ~s has already joined the cluster~n", [NodeString]),
Expand Down
33 changes: 27 additions & 6 deletions apps/ejabberd/src/mongoose_cluster.erl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

%% TODO: it might make sense to expose this stuff as mod_admin_extra_cluster

-export([join/1,
leave/0]).
-export([join/1, leave/0, remove_from_cluster/1, is_node_alive/1]).

-include("ejabberd.hrl").

Expand All @@ -17,8 +16,6 @@
%% This drops all current connections and discards all persistent
%% data from Mnesia. Use with caution!
%% Next time the node starts, it will connect to other members automatically.
%% TODO: when/if exposing through ejabberd_admin make sure it's guarded
%% by an interactive yes/no question or some flag
-spec join(node()) -> ok.
join(ClusterMember) ->
?INFO_MSG("join ~p", [ClusterMember]),
Expand All @@ -33,8 +30,6 @@ join(ClusterMember) ->
%% data from Mnesia. Use with caution!
%% Next time the node starts, it will NOT connect to previous members.
%% Remaining members will remove this node from the cluster Mnesia schema.
%% TODO: when/if exposing through ejabberd_admin make sure it's guarded
%% by an interactive yes/no question or some flag
-spec leave() -> ok.
leave() ->
?INFO_MSG("leave", []),
Expand All @@ -46,10 +41,36 @@ leave() ->
ok = mnesia:start()
end).

%% @doc Remove dead node from the cluster.
%% The removing node must be down
-spec remove_from_cluster(node()) -> ok.
remove_from_cluster(Node) ->
NodeAlive = is_node_alive(Node),
NodeAlive andalso error({node_is_alive, Node}),
remove_dead_from_cluster(Node).

%%
%% Helpers
%%

remove_dead_from_cluster(DeadNode) ->
?INFO_MSG("removing dead node ~p from the cluster", [DeadNode]),
case mnesia:del_table_copy(schema, DeadNode) of
{atomic, ok} ->
ok;
{aborted, R} ->
error({del_table_copy_schema, R})
end.

is_node_alive(Node) ->
try check_networking(Node) of
true ->
true
catch
error:_ ->
false
end.

is_app_running(App) ->
lists:keymember(App, 1, application:which_applications()).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,25 @@ Exit shell and start MongooseIM using `mongooseim start/live`

#### MongooseIM 1.7.0 and newer

In case to remove a node from the cluster call:
Since MongooseIM 1.7.0 there are two commands for removing a node from the cluster.
In case to leave a running node from the cluster call:

```bash
mongooseimctl leave_cluster
```

It makes sense only if the node is the part of any cluster, e.g called `join_cluster` from that node before.
The successful output from above command starts with `You have successfully left the node`.
The successful output from above command starts with `You have successfully left the node from the cluster`.

In order to remove another node from the cluster call following command from one of the cluster members:

```bash
mongooseimctl remove_from_cluster RemoteNodeName
```

Where `RemoteNodeName` is a name of the node that we'd like to remove from our cluster. This command could be useful when
the node is dead and not responding and we'd like to remove it remotely.
The successful output from above command starts with `The node has been removed from the cluster`

#### MongooseIM 1.5.0 - 1.7.0

Expand Down
77 changes: 53 additions & 24 deletions rel/files/mongooseimctl
Original file line number Diff line number Diff line change
Expand Up @@ -31,44 +31,70 @@ if [ -z "$NODENAME_ARG" ]; then
echo "vm.args needs to have either -name or -sname parameter."
exit 1
fi
FORCE_FLAG1='"--force"'
FORCE_FLAG2='"-f"'

NAME_TYPE="${NODENAME_ARG% *}"
NODENAME="${NODENAME_ARG#* }"

join_cluster()
{
echo "Warning. This will drop all current connections and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
read GUARD

if [ $GUARD = "yes" ]; then
echo "Joining the cluster..."
ctl $QUOTED_ARGS
else
echo "Operation discarded by user"
exit 1
fi
WARNING="Warning. This will drop all current connections and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
DISPLAY="Joining the cluster..."
manage_cluster "$WARNING" "$DISPLAY"
}

leave_cluster()
{
echo "Warning. This will drop all current connections and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
read GUARD

if [ $GUARD = "yes" ]; then
echo "Leaving the cluster..."
ctl $QUOTED_ARGS
else
echo "Operation discarded by user"
exit 1
fi
WARNING="Warning. This will drop all current connections and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
DISPLAY="Leaving the cluster..."

manage_cluster "$WARNING" "$DISPLAY"
}

remove_from_cluster()
{
WARNING="Warning. If the node is alive this will drop all current connections on the remote node and will discard all persistent data from Mnesia.
Do you want to continue? (yes/no)"
DISPLAY="Removing node from the cluster..."
manage_cluster "$WARNING" "$DISPLAY"
}

manage_cluster()
{
case $QUOTED_ARGS in
*$FORCE_FLAG1*|*$FORCE_FLAG2*)
QUOTED_ARGS=$(echo $QUOTED_ARGS|sed "s/$FORCE_FLAG1//")
QUOTED_ARGS=$(echo $QUOTED_ARGS|sed "s/$FORCE_FLAG2//")
ctl $QUOTED_ARGS;;
*)
GUARD="unknown"
until [ "$GUARD" = "yes" ] || [ "$GUARD" = "no" ] ; do
echo $1
read GUARD
if [ "$GUARD" = "yes" ]; then
echo $2
ctl $QUOTED_ARGS
elif [ "$GUARD" = "no" ]; then
echo "Operation discarded by user"
exit 1
else
echo "Command unknown. Do you want to continue? (yes/no)"
fi
done;;
esac
}

start ()
{
$RUNNER_SCRIPT_DIR/mongooseim start
}
stop ()
{
$RUNNER_SCRIPT_DIR/mongooseim stop
}

# attach to server
debug ()
Expand All @@ -90,8 +116,9 @@ help ()
echo " debug Attach an interactive Erlang shell to a running MongooseIM node"
echo " live Start MongooseIM node in live (interactive) mode"
echo "MongooseIM cluster management commads:"
echo " join_cluster other_node_name Add current node to cluster"
echo " leave_cluster Leave current node from the cluster"
echo " join_cluster other_node_name Add current node to cluster"
echo " leave_cluster Leave current node from the cluster"
echo " remove_from_cluster other_node_name Remove dead node from the cluster"
echo ""
}

Expand Down Expand Up @@ -239,8 +266,10 @@ done

case $1 in
'start') start;;
'stop') stop;;
'join_cluster') join_cluster;;
'leave_cluster') leave_cluster;;
'remove_from_cluster') remove_from_cluster;;
'debug') debug;;
'live') live;;
'started') wait_for_status 0 30 2;; # wait 30x2s before timeout
Expand Down
2 changes: 0 additions & 2 deletions rel/files/nodetool
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ main(Args) ->
io:format("~p\n", [rpc:call(TargetNode, init, restart, [], 60000)]);
["reboot"] ->
io:format("~p\n", [rpc:call(TargetNode, init, reboot, [], 60000)]);
["remove_from_cluster", DeadNode] ->
io:format("~p\n", [rpc:call(TargetNode, mnesia, del_table_copy, [schema, list_to_atom(DeadNode)] )]);
["rpc", Module, Function | RpcArgs] ->
case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function),
[RpcArgs], 60000) of
Expand Down
28 changes: 28 additions & 0 deletions rel/reltool_vars/node3_vars.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{hosts, "[\"localhost\",
\"anonymous.localhost\",
\"localhost.bis\"
]"}.
{outgoing_s2s_port, 5295}.
{odbc_server, ""}.
{s2s_addr, "{ {s2s_addr, \"localhost2\"}, {127,0,0,1} }."}.
{s2s_default_policy, allow}.
{node_name, "mongooseim3@localhost"}.
{ejabberd_c2s_port, 5262}.
{ejabberd_s2s_in_port, 5291}.
{cowboy_port, 5283}.
{cowboy_port_secure, 5290}.
{ejabberd_service, ""}.
{mod_last, "{mod_last, []},"}.
{mod_privacy, "{mod_privacy, []},"}.
{mod_private, "{mod_private, []},"}.
{mod_roster, "{mod_roster, []},"}.
{mod_http_notification, "{mod_http_notification, []},"}.
{http_api_endpoint, "{5296, \"127.0.0.1\"}"}.
{mod_vcard, "{mod_vcard, [ %{matches, 1},\n"
"%{search, true},\n"
"%{host, directory.@HOST@}\n"
"]},"}.
{s2s_use_starttls, "{s2s_use_starttls, optional}."}.
{s2s_certfile, "{s2s_certfile, \"priv/ssl/fake_server.pem\"}."}.
{tls_config, "{certfile, \"priv/ssl/fake_server.pem\"}, starttls, {ciphers, \"DHE-RSA-AES256-SHA\"},"}.
{secondary_c2s, ""}.
4 changes: 4 additions & 0 deletions test/ejabberd_tests/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
{domain, <<"localhost">>},
{vars, "node2_vars.config"},
{cluster, mim}]},
{mim3, [{node, mongooseim3@localhost},
{domain, <<"localhost">>},
{vars, "node3_vars.config"},
{cluster, mim}]},
{fed, [{node, fed1@localhost},
{domain, <<"fed1">>},
{vars, "fed1_vars.config"},
Expand Down
Loading

0 comments on commit 3b89d5d

Please sign in to comment.