-
Notifications
You must be signed in to change notification settings - Fork 3.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
rabbit: Run plugins' boot steps during rabbit start/2 #2656
rabbit: Run plugins' boot steps during rabbit start/2 #2656
Conversation
So what good does this make if a plugin boot step cannot rely on any of its dependencies? |
That's the old behavior (the one still in 3.7.x), so if plugin works with it, they should work with this patch. The new behavior fixed that (dependencies were loaded/had their boot steps exeucted/started before a plugin was taken care of), but they are incompatible, so we have to choose between the two. |
I'm going to test to see if the changes in this PR resolve the issue reported by @noxdafox https://groups.google.com/g/rabbitmq-users/c/m7jgGmIk4oY/m/4tmLsVM0AwAJ |
c293ae3
to
c45d305
Compare
Thanks @lukebakken for pinging me. Please correct me if this is not the right place for this conversation. From a Plugin development POW I would expect to be able to hook into the broker bootstrap procedure to be able to configure its internals based on my needs:
Then, I would wait for my Erlang application (if any) to be started at last. As @dumbbell said, my app depends on It is valuable to have this pre and post bootstrapping phases as it gives me more control on how to properly setup my Plugin around the broker. |
Associated PRs:
|
c45d305
to
90fa013
Compare
Hm, the error reported in gotthardp/rabbitmq-email#42 happens with this PR (#2656) as well as the current
The reason being that on plugin init it tries to start an AMQP direct connection which fails with that error. I'm going to continue investigating (cc @dumbbell @michaelklishin). |
@lukebakken please be wary that the last version of the plugin already addresses this issue. Therefore to reproduce it, you should use a less recent version. Please let me know if you encounter issues while testing. |
@noxdafox thank you for the heads up! I'm sure I would have missed that. |
Here is a patch adding a diff --git a/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_boot_state.erl b/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_boot_state.erl
index c76824e7be..12a0e1f192 100644
--- a/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_boot_state.erl
+++ b/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_boot_state.erl
@@ -12,11 +12,13 @@
-export([get/0,
set/1,
- wait_for/2]).
+ wait_for/2,
+ has_reached/1,
+ has_reached_and_is_active/1]).
-define(PT_KEY_BOOT_STATE, {?MODULE, boot_state}).
--type boot_state() :: 'stopped' | 'booting' | 'ready' | 'stopping'.
+-type boot_state() :: 'stopped' | 'booting' | 'core_started' | 'ready' | 'stopping'.
-export_type([boot_state/0]).
@@ -36,7 +38,7 @@ set(BootState) ->
-spec wait_for(boot_state(), timeout()) -> ok | {error, timeout}.
wait_for(BootState, infinity) ->
- case is_reached(BootState) of
+ case has_reached(BootState) of
true -> ok;
false -> Wait = 200,
timer:sleep(Wait),
@@ -44,7 +46,7 @@ wait_for(BootState, infinity) ->
end;
wait_for(BootState, Timeout)
when is_integer(Timeout) andalso Timeout >= 0 ->
- case is_reached(BootState) of
+ case has_reached(BootState) of
true -> ok;
false -> Wait = 200,
timer:sleep(Wait),
@@ -53,24 +55,35 @@ wait_for(BootState, Timeout)
wait_for(_, _) ->
{error, timeout}.
-boot_state_idx(stopped) -> 0;
-boot_state_idx(booting) -> 1;
-boot_state_idx(ready) -> 2;
-boot_state_idx(stopping) -> 3.
+boot_state_idx(stopped) -> 0;
+boot_state_idx(booting) -> 1;
+boot_state_idx(core_started) -> 2;
+boot_state_idx(ready) -> 3;
+boot_state_idx(stopping) -> 4.
is_valid(BootState) ->
is_integer(boot_state_idx(BootState)).
-is_reached(TargetBootState) ->
- is_reached(?MODULE:get(), TargetBootState).
+has_reached(TargetBootState) ->
+ has_reached(?MODULE:get(), TargetBootState).
-is_reached(CurrentBootState, CurrentBootState) ->
+has_reached(CurrentBootState, CurrentBootState) ->
true;
-is_reached(stopping, stopped) ->
+has_reached(stopping, stopped) ->
false;
-is_reached(_CurrentBootState, stopped) ->
+has_reached(_CurrentBootState, stopped) ->
true;
-is_reached(stopped, _TargetBootState) ->
+has_reached(stopped, _TargetBootState) ->
true;
-is_reached(CurrentBootState, TargetBootState) ->
+has_reached(CurrentBootState, TargetBootState) ->
boot_state_idx(TargetBootState) =< boot_state_idx(CurrentBootState).
+
+has_reached_and_is_active(TargetBootState) ->
+ case ?MODULE:get() of
+ stopped ->
+ false;
+ CurrentBootState ->
+ has_reached(CurrentBootState, TargetBootState)
+ andalso
+ not has_reached(CurrentBootState, stopping)
+ end.
diff --git a/deps/rabbit/src/rabbit.erl b/deps/rabbit/src/rabbit.erl
index 3ccfbe0e3a..80c114e40d 100644
--- a/deps/rabbit/src/rabbit.erl
+++ b/deps/rabbit/src/rabbit.erl
@@ -884,7 +884,7 @@ start(normal, []) ->
%% We can load all plugins and refresh their feature flags at
%% once, because it does not involve running code from the
%% plugins.
- app_utils:load_applications(Plugins),
+ ok = app_utils:load_applications(Plugins),
ok = rabbit_feature_flags:refresh_feature_flags_after_app_load(
Plugins),
@@ -893,6 +893,7 @@ start(normal, []) ->
ok = rabbit_boot_steps:run_boot_steps([rabbit | Plugins]),
run_postlaunch_phase(Plugins),
+ rabbit_boot_state:set(core_started),
{ok, SupPid}
catch
throw:{error, _} = Error ->
diff --git a/deps/rabbit/src/rabbit_direct.erl b/deps/rabbit/src/rabbit_direct.erl
index 3fc2d75908..0506e1bf6c 100644
--- a/deps/rabbit/src/rabbit_direct.erl
+++ b/deps/rabbit/src/rabbit_direct.erl
@@ -75,7 +75,7 @@ auth_fun({Username, Password}, VHost, ExtraAuthProps) ->
connect(Creds, VHost, Protocol, Pid, Infos) ->
ExtraAuthProps = extract_extra_auth_props(Creds, VHost, Pid, Infos),
AuthFun = auth_fun(Creds, VHost, ExtraAuthProps),
- case rabbit:is_running() of
+ case rabbit_boot_state:has_reached_and_is_active(core_started) of
true ->
case whereis(rabbit_direct_client_sup) of
undefined -> What do you think? |
There are still some errors reported by CI. I will continue to study them tomorrow. |
3b6e594
to
8cb74a2
Compare
I could only reproduce one failure in rabbitmq_stomp and it went away after I updated my local copy of osiris. So I rebased the branch and force-pushed, just in case the previous builds were running with inconsistencies between RabbitMQ and osiris' master branch. |
This restores the behavior prior the commit making `rabbit` closer to a standard Erlang application. Plugins are still actually started after rabbit is started (because they depend on the `rabbit` application). Only the execution of their boot steps was moved earlier. With the behavior restored, it also means that a plugin's dependencies are not started yet when its boot steps are executed. V2: Move the maintenance mode reset before the plugin boot steps run. V3: Add a `core_started` boot state. That state is reached at the end of the `rabbit` app start function. It indicates when the RabbitMQ core is started but the full service is not yet ready. We now use this state in direct connection code to determine if clients can open a direct connection. We have to do that because some plugins open a direct connection as part of their own startup (i.e. they can't wait for the `ready` boot state which comes later).
8cb74a2
to
a0cd2e5
Compare
Thanks @dumbbell I'm going to wrap up testing this today. |
@dumbbell @noxdafox I have confirmed that this PR #2656 fixes the issue that the changes in noxdafox/rabbitmq-message-deduplication#62 address. Here's how I tested:
|
I can confirm that this does not negatively affect |
…rabbit-start rabbit: Run plugins' boot steps during rabbit start/2 (cherry picked from commit 67624cd)
Backported to |
@lukebakken I confirm your test approach to be correct. Thanks! |
This restores the behavior prior the commit making
rabbit
closer to a standard Erlang application.Plugins are still actually started after rabbit is started (because they depend on the
rabbit
application). Only the execution of their boot steps was moved earlier.With the behavior restored, it also means that a plugin's dependencies are not started yet when its boot steps are executed.