Working GPU hotloading, auto unloads when all outputs are disconnected

This has been implemented specifically for the use case of Nvidia Optimus laptops with display ports hardwired to the dGPU. I'm hoping it may prove useful for other use-cases as well, such as hotplugging external GPUs. The implementation is not pretty, and likely is full of bugs and special-cases. I am not familiar with the internal architecture of wlroots, so would love any constructive criticism of the implementation / C style. The status quo: - with both i915 and nouveau drivers loaded, sway can use both the laptop monitor AND external monitors, enabling and disabling outputs as needed - BUT, the `nouveau` driver always stays loaded, and so my laptop idles at 20W instead of 3-5W :( - unloading `nouveau` requires restarting sway. This is annoying. After this patch: - run `sudo modprobe nouveau` while sway is running - wlroots will detect the udev event and add a new drm backend and then add it to the multi backend. - unplug all outputs from the dGPU, and sway will destroy the dGPU drm backend. You can verify that the nouveau driver is no longer in use, by running `lsof /dev/dri/card*` - The user must still manually run `sudo rmmod nouveau` and then also use `bbswitch`, to fully power off the dGPU and achieve power-saving gains. Though still somewhat manual, this whole process can be done WITHOUT shutting down sway, which is awesome for productivity (no longer have to restart the swaywm several times throughout the day). The future (perhaps a separate script): - Automatically loads `nouveau` when a cable is connected (unsure if this is possible due to hardwiring of HDMI port, see below) - Automatically unloads `nouveau` and runs bbswitch after all monitors have been disconnected for a while. - This would be BLISS - basically as automatic as Windows when it comes to using Nvidia Optimus configuration - except of course that sway won't actually *render* from the dGPU. For my workloads, this isn't a problem, but perhaps someone will take this further and reimplement something like bumblebee but for Wayland. Is that even a good idea? I don't know. Note: at the moment, this driver unloading is VERY eager, i.e.: if you run `sudo modprobe nouveau` with no external monitors connected, sway will create the new backend, then immediately destroy it again because all outputs were disabled. In practice, this is probably not intended behaviour. Perhaps there should be an explicit command, e.g.: "unload all unused drm backends" that can be called by some other script, which will trigger unloading. Or perhaps, if a drm backend is unused for a given amount of time (e.g.: a few minutes) it will get unloaded. Also note, at least on my laptop, there seemingly no way to tell when a cable is plugged into the HDMI port (probably because it is hardwired to the Nvidia GPU). If this were possible, then you could write a script which will automatically power on the nvidia GPU when an external monitor is connected. Currently, you need to do this manually with `sudo modprobe nouveau`.
neon64 · Oct 7, 2020 · 76268d4 · 76268d4
1 parent dc32ece
commit 76268d4
Show file tree

Hide file tree

Showing 8 changed files with 72 additions and 20 deletions.
diff --git a/backend/backend.c b/backend/backend.c
@@ -20,6 +20,7 @@
 
 #if WLR_HAS_X11_BACKEND
 #include <wlr/backend/x11.h>
+#include <include/backend/drm/drm.h>
 #endif
 
 void wlr_backend_init(struct wlr_backend *backend,

diff --git a/backend/drm/backend.c b/backend/drm/backend.c
@@ -15,6 +15,7 @@
 #include "backend/drm/drm.h"
 #include "util/signal.h"
 #include <backtrace.h>
+#include <include/wlr/backend/multi.h>
 
 struct wlr_drm_backend *get_drm_backend_from_backend(
 		struct wlr_backend *wlr_backend) {
@@ -57,8 +58,6 @@ static void backend_destroy(struct wlr_backend *backend) {
 
 	wlr_log(WLR_INFO, "Destroying DRM BACKEND!!!");
 
-	print_trace();
-
 	struct wlr_drm_backend *drm = get_drm_backend_from_backend(backend);
 
 	restore_drm_outputs(drm);
@@ -74,6 +73,7 @@ static void backend_destroy(struct wlr_backend *backend) {
 	wl_list_remove(&drm->session_destroy.link);
 	wl_list_remove(&drm->session_signal.link);
 	wl_list_remove(&drm->drm_invalidated.link);
+	wl_list_remove(&drm->add_gpu_signal.link);
 
 	finish_drm_resources(drm);
 	finish_drm_renderer(&drm->renderer);
@@ -109,6 +109,35 @@ bool wlr_backend_is_drm(struct wlr_backend *b) {
 	return b->impl == &backend_impl;
 }
 
+static void handle_add_gpu(struct wl_listener* listener, void *data) {
+	struct wlr_drm_backend *drm =
+			wl_container_of(listener, drm, add_gpu_signal);
+	struct wlr_event_add_gpu *event = data;
+
+	wlr_log(WLR_INFO, "parent drm fd is %d", drm->fd);
+
+	wlr_log(WLR_INFO, "got handle_gpu signal with fd = %d", event->gpu_fd);
+
+	// TODO:
+	struct wlr_backend *child_drm = wlr_drm_backend_create(drm->display, drm->session,
+						event->gpu_fd, &drm->backend, NULL);
+
+	if (!child_drm) {
+		wlr_log(WLR_ERROR, "Failed to open DRM device %d", event->gpu_fd);
+		return;
+	} else {
+		wlr_log(WLR_INFO, "Successfully opened DRM device %d", event->gpu_fd);
+	}
+
+	fprintf(stderr, "is multi? %d\n\n\n", wlr_backend_is_multi(&drm->backend));
+
+	if(!wlr_multi_backend_add(&drm->multi->backend, child_drm)) {
+		wlr_log(WLR_INFO, "Failed to add to multi backend");
+	} else {
+		wlr_log(WLR_INFO, "Added to multi backend");
+	}
+}
+
 static void session_signal(struct wl_listener *listener, void *data) {
 	struct wlr_drm_backend *drm =
 		wl_container_of(listener, drm, session_signal);
@@ -215,6 +244,7 @@ struct wlr_backend *wlr_drm_backend_create(struct wl_display *display,
 	}
 	wlr_backend_init(&drm->backend, &backend_impl);
 
+	drm->multi = NULL;
 	drm->session = session;
 	wl_list_init(&drm->outputs);
 
@@ -239,6 +269,9 @@ struct wlr_backend *wlr_drm_backend_create(struct wl_display *display,
 	drm->session_signal.notify = session_signal;
 	wl_signal_add(&session->session_signal, &drm->session_signal);
 
+	drm->add_gpu_signal.notify = handle_add_gpu;
+	wl_signal_add(&session->events.add_gpu, &drm->add_gpu_signal);
+
 	if (!check_drm_features(drm)) {
 		goto error_event;
 	}

diff --git a/backend/drm/drm.c b/backend/drm/drm.c
@@ -1012,7 +1012,6 @@ bool drm_connector_is_cursor_visible(struct wlr_drm_connector *conn) {
 }
 
 static void drm_connector_destroy(struct wlr_output *output) {
-	wlr_log(WLR_INFO, "ENTERING drm_connector_destroy!!");
 	struct wlr_drm_connector *conn = get_drm_connector_from_output(output);
 	drm_connector_cleanup(conn);
 	drmModeFreeCrtc(conn->old_crtc);

diff --git a/backend/multi/backend.c b/backend/multi/backend.c
@@ -6,6 +6,7 @@
 #include <wlr/backend/interface.h>
 #include <wlr/backend/session.h>
 #include <wlr/util/log.h>
+#include <include/backend/drm/drm.h>
 #include "backend/multi.h"
 #include "util/signal.h"
 
@@ -201,6 +202,15 @@ bool wlr_multi_backend_add(struct wlr_backend *_multi,
 	sub->new_output.notify = new_output_reemit;
 
 	wlr_signal_emit_safe(&multi->events.backend_add, backend);
+
+	// TODO: get rid of this hack
+	// drm backends now keep track of their parent multi backend
+	// for GPU hotplugging purposes
+	if(wlr_backend_is_drm(backend)) {
+		struct wlr_drm_backend *drm_backend = get_drm_backend_from_backend(backend);
+		drm_backend->multi = multi;
+	}
+
 	return true;
 }
 

diff --git a/backend/session/session.c b/backend/session/session.c
@@ -98,24 +98,21 @@ static int udev_event(int fd, uint32_t mask, void *data) {
 	if (!found && strcmp(action, "add") == 0
 			&& strcmp(udev_device_get_subsystem(udev_dev), "drm") == 0
 			&& is_card(udev_device_get_sysname(udev_dev))) {
-		wlr_log(WLR_INFO, "wlroots detected a fresh drm device, trying to add to backend");
 
-		int gpu_fd = session_try_add_gpu(session, udev_dev);
+		int gpu_fd = session_try_open_gpu(session, udev_dev);
 
 		if(gpu_fd >= 0) {
-			wlr_log(WLR_INFO, "got GPU!");
-//			struct wlr_backend *drm = wlr_drm_backend_create(display, session,
-//			gpus[i], primary_drm, create_renderer_func);
-//			if (!drm) {
-//		wlr_log(WLR_ERROR, "Failed to open DRM device %d", gpus[i]);
-//		continue;
-//	}
-//
-//	if (!primary_drm) {
-//		primary_drm = drm;
-//	}
-//
-//	wlr_multi_backend_add(backend, drm);
+
+			struct wlr_event_add_gpu *event = malloc(sizeof(*event));
+			event->session = session;
+			event->gpu_fd = gpu_fd;
+
+			wlr_log(WLR_DEBUG, "sending add GPU signal with fd = %d", gpu_fd);
+
+			// this is the same signal as a VT switch...
+			wlr_signal_emit_safe(&session->events.add_gpu, event);
+
+			free(event);
 		}
 	}
 
@@ -133,6 +130,7 @@ static void handle_display_destroy(struct wl_listener *listener, void *data) {
 void session_init(struct wlr_session *session) {
 	wl_signal_init(&session->session_signal);
 	wl_signal_init(&session->events.destroy);
+	wl_signal_init(&session->events.add_gpu);
 	wl_list_init(&session->devices);
 }
 
@@ -429,7 +427,7 @@ size_t wlr_session_find_gpus(struct wlr_session *session,
 	return i;
 }
 
-int session_try_add_gpu(struct wlr_session *session, struct udev_device *udev_dev) {
+int session_try_open_gpu(struct wlr_session *session, struct udev_device *udev_dev) {
 	bool is_boot_vga = false;
 
 	const char *seat = udev_device_get_property_value(udev_dev, "ID_SEAT");

diff --git a/include/backend/drm/drm.h b/include/backend/drm/drm.h
@@ -13,6 +13,7 @@
 #include <wlr/backend/session.h>
 #include <wlr/render/drm_format_set.h>
 #include <wlr/render/egl.h>
+#include <backend/multi.h>
 #include <xf86drmMode.h>
 #include "iface.h"
 #include "properties.h"
@@ -93,11 +94,15 @@ struct wlr_drm_backend {
 	struct wl_listener session_destroy;
 	struct wl_listener session_signal;
 	struct wl_listener drm_invalidated;
+	struct wl_listener add_gpu_signal;
 
 	struct wl_list outputs;
 
 	struct wlr_drm_renderer renderer;
 	struct wlr_session *session;
+
+	// TODO: is this needed?
+	struct wlr_multi_backend *multi;
 };
 
 enum wlr_drm_connector_state {

diff --git a/include/backend/session/session.h b/include/backend/session/session.h
@@ -4,6 +4,6 @@
 struct wlr_session;
 
 void session_init(struct wlr_session *session);
-int session_try_add_gpu(struct wlr_session *session, struct udev_device *udev_dev);
+int session_try_open_gpu(struct wlr_session *session, struct udev_device *udev_dev);
 
 #endif
diff --git a/include/wlr/backend/session.h b/include/wlr/backend/session.h
@@ -42,9 +42,15 @@ struct wlr_session {
 
 	struct {
 		struct wl_signal destroy;
+		struct wl_signal add_gpu;
 	} events;
 };
 
+struct wlr_event_add_gpu {
+	struct wlr_session* session;
+	int gpu_fd;
+};
+
 /*
  * Opens a session, taking control of the current virtual terminal.
  * This should not be called if another program is already in control