perform async task running status and destruction using same session

TODO: Logging and debugging for remote spawner's is_alive checks. TODO: Some additional lint fixes buy Cleber. TODO: This will be squashed soon. Signed-off-by: Plamen Dimitrov <plamen.dimitrov@intra2net.com> Signed-off-by: Cleber Rosa <crosa@redhat.com>
avocado-framework · Oct 16, 2023 · 600b8c1 · 600b8c1
1 parent 5c28e95
commit 600b8c1
Showing 1 changed file with 38 additions and 11 deletions.
diff --git a/avocado/plugins/spawners/remote.py b/avocado/plugins/spawners/remote.py
@@ -5,13 +5,13 @@
 import os
 import shlex
 
-from aexpect import remote
+from aexpect import exceptions, remote
 
 from avocado.core.plugin_interfaces import Init, Spawner
 from avocado.core.settings import settings
 from avocado.core.spawners.common import SpawnerMixin, SpawnMethod
 
-LOG = logging.getLogger(__name__)
+LOG = logging.getLogger("avocado.job." + __name__)
 
 
 class RemoteSpawnerException(Exception):
@@ -78,7 +78,7 @@ async def run_remote_cmd_async(session, command, timeout):
         except exceptions.ShellTimeoutError:
             status, output = 2, f"Remote command timeout of {timeout} reached"
         except exceptions.ShellProcessTerminatedError:
-            status, output = 2, f"Remote command terminated prematurely"
+            status, output = 2, "Remote command terminated prematurely"
         return status, output
 
     @contextlib.contextmanager
@@ -100,7 +100,7 @@ def reserve_slot(self, runtime_task):
             for session_slot in self.config.get("spawner.remote.slots"):
                 if not session_slot:
                     continue
-                with open(session_slot, "r") as f:
+                with open(session_slot, "r", encoding="utf-8") as f:
                     session_data = json.load(f)
                 session = remote.remote_login(**session_data)
                 RemoteSpawner.slots_cache[session] = False
@@ -132,8 +132,25 @@ def is_task_alive(runtime_task):
 
         session = runtime_task.spawner_handle
 
-        status, _ = session.cmd_status_output("pgrep -r R,S -f task-run")
-        return status == 0
+        # TODO: it seems we always call this once right at the end of each
+        # test (using other methods to detect running test) and when doing
+        # this always end up with "" output and timeout error, slowing the
+        # run down and leading to task not being alive without fatal problems
+        return False
+        # TODO: create second session to properly check this? why is the first
+        # one always ending with empty output?
+        # Until this gets solved, lint needs to be silenced:
+        # pylint: disable=W0101
+        try:
+            out = session.read_up_to_prompt(timeout=1.0)
+            LOG.critical(f"Alive output: {out} with task {runtime_task}")
+            return True
+        except exceptions.ExpectTimeoutError as error:
+            LOG.critical(f"Alive error: {error} with task {runtime_task}")
+            return False
+        # TODO: consider a secondary session as an alternative?
+        # status, _ = session.cmd_status_output("pgrep -r R,S -f task-run")
+        # return status == 0
 
     @with_slot_reservation
     async def spawn_task(self, runtime_task):
@@ -157,7 +174,9 @@ async def spawn_task(self, runtime_task):
         setup_hook = self.config.get("spawner.remote.setup_hook")
         # Customize and deploy test data to the container
         if setup_hook:
-            status, output = await RemoteSpawner.run_remote_cmd_async(session, setup_hook)
+            status, output = await RemoteSpawner.run_remote_cmd_async(
+                session, setup_hook
+            )
             LOG.debug(f"Customization command exited with code {status}")
             if status != 0:
                 LOG.error(
@@ -176,7 +195,7 @@ async def spawn_task(self, runtime_task):
             LOG.error(
                 f"Error exit code {status} on {session.host}:{session.port} "
                 f"with output:\n{output}"
-                )
+            )
             return False
 
         return True
@@ -196,10 +215,18 @@ async def wait_task(self, runtime_task):
 
     async def terminate_task(self, runtime_task):
         session = runtime_task.spawner_handle
-        status, _ = session.cmd_status_output("pkill -f task-run")
-        if status != 0:
-            LOG.error("Failed to terminate task on {host}")
+        session.sendcontrol("c")
+        try:
+            session.read_up_to_prompt()
+            return True
+        except exceptions.ExpectTimeoutError:
+            LOG.error("Failed to terminate task on {session.host}")
             return False
+        # TODO: consider a secondary session as an alternative?
+        # status, _ = session.cmd_status_output("pkill -f task-run")
+        # if status != 0:
+        #    LOG.error("Failed to terminate task on {session.host}")
+        #    return False
 
     @staticmethod
     async def check_task_requirements(runtime_task):