jina-ai · JoanFM · Jan 14, 2023 · Jan 6, 2023 · Jan 6, 2023 · Jan 6, 2023
diff --git a/jina/serve/networking.py b/jina/serve/networking.py
@@ -1,6 +1,8 @@
 import asyncio
 import ipaddress
 import os
+import threading
+import time
 from collections import defaultdict
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Sequence, Set, Tuple, Union
@@ -613,9 +615,6 @@ def _get_connection_list(
                     return self._get_connection_list(
                         deployment, type_, 0, increase_access_count
                     )
-                self._logger.debug(
-                    f'did not find a connection for deployment {deployment}, type {type_} and entity_id {entity_id}. There are {len(self._deployments[deployment][type_]) if deployment in self._deployments else 0} available connections for this deployment and type. '
-                )
                 return None
 
         def _add_deployment(self, deployment: str):
@@ -1114,6 +1113,71 @@ async def task_wrapper():
 
         return asyncio.create_task(task_wrapper())
 
+    async def warmup(
+        self,
+        deployment: str,
+        stop_event: threading.Event,
+    ):
+        '''Executes JinaInfoRPC against the provided deployment. A single task is created for each replica connection.
+        :param deployment: deployment name and the replicas that needs to be warmed up.
+        :param stop_event: signal to indicate if an early termination of the task is required for graceful teardown.
+        '''
+
+        async def task_wrapper(target_warmup_responses, target, channel):
+            try:
+                stub = jina_pb2_grpc.JinaInfoRPCStub(channel=channel)
+                call_result = stub._status(
+                    request=jina_pb2.google_dot_protobuf_dot_empty__pb2.Empty(),
+                )
+                await call_result
+                target_warmup_responses[target] = True
+            except Exception:
+                target_warmup_responses[target] = False
+
+        try:
+            timeout = time.time() + 60 * 5  # 5 minutes from now
+            warmed_up_targets = set()
+
+            while not stop_event.is_set():
+                # refresh channels in case connection has been reset due to InternalNetworkError
+                target_to_channel = self.__extract_target_to_channel(deployment)
+                for warmed_target in warmed_up_targets:
+                    target_to_channel.pop(warmed_target)
+
+                replica_warmup_responses = {}
+                tasks = []
+                for target, channel in target_to_channel.items():
+                    tasks.append(
+                        asyncio.create_task(
+                            task_wrapper(replica_warmup_responses, target, channel)
+                        )
+                    )
+                await asyncio.gather(*tasks, return_exceptions=True)
+
+                for target, response in replica_warmup_responses.items():
+                    if response:
+                        warmed_up_targets.add(target)
+
+                if time.time() > timeout or len(target_to_channel) == 0:
+                    return
+
+                await asyncio.sleep(0.2)
+        except Exception as ex:
+            self._logger.error(f'error with warmup up task: {ex}')
+            return
+
+    def __extract_target_to_channel(self, deployment):
+        replica_set = set()
+        replica_set.update(self._connections.get_replicas_all_shards(deployment))
+        replica_set.add(
+            self._connections.get_replicas(deployment=deployment, head=True)
+        )
+
+        target_to_channel = {}
+        for replica_list in filter(None, replica_set):
+            target_to_channel.update(replica_list._address_to_channel)
+        return target_to_channel
+
     @staticmethod
     def __aio_channel_with_tracing_interceptor(
         address,

diff --git a/jina/serve/runtimes/asyncio.py b/jina/serve/runtimes/asyncio.py
@@ -1,6 +1,7 @@
 import argparse
 import asyncio
 import signal
+import threading
 import time
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Optional, Union
@@ -76,6 +77,8 @@ def _cancel(signum, frame):
         self._start_time = time.time()
         self._loop.run_until_complete(self.async_setup())
         self._send_telemetry_event()
+        self.warmup_task = None
+        self.warmup_stop_event = threading.Event()
 
     def _send_telemetry_event(self):
         send_telemetry_event(event='start', obj=self, entity_id=self._entity_id)
@@ -161,6 +164,20 @@ async def async_run_forever(self):
         """The async method to run until it is stopped."""
         ...
 
+    async def cancel_warmup_task(self):
+        '''Cancel warmup task if exists and is not completed. Cancellation is required if the Flow is being terminated before the
+        task is successful or hasn't reached the max timeout.
+        '''
+        if self.warmup_task:
+            try:
+                if not self.warmup_task.done():
+                    self.logger.debug(f'Cancelling warmup task.')
+                    self.warmup_stop_event.set()
+                    await self.warmup_task
+                    self.warmup_task.exception()
+            except:
+                pass
+
     # Static methods used by the Pod to communicate with the `Runtime` in the separate process
 
     @staticmethod

diff --git a/jina/serve/runtimes/gateway/__init__.py b/jina/serve/runtimes/gateway/__init__.py
@@ -131,17 +131,22 @@ async def _wait_for_cancel(self):
 
     async def async_teardown(self):
         """Shutdown the server."""
+        await self.cancel_warmup_task()
         await self.gateway.streamer.close()
         await self.gateway.shutdown()
         await self.async_cancel()
 
     async def async_cancel(self):
         """Stop the server."""
+        await self.cancel_warmup_task()
         await self.gateway.streamer.close()
         await self.gateway.shutdown()
 
     async def async_run_forever(self):
         """Running method of the server."""
+        self.warmup_task = asyncio.create_task(
+            self.gateway.streamer.warmup(self.warmup_stop_event)
+        )
         await self.gateway.run_server()
         self.is_cancel.set()
 

diff --git a/jina/serve/runtimes/gateway/composite/gateway.py b/jina/serve/runtimes/gateway/composite/gateway.py
@@ -1,3 +1,4 @@
+import asyncio
 import copy
 from typing import Any, List, Optional
 
@@ -36,18 +37,27 @@ async def setup_server(self):
         """
         setup GRPC server
         """
+        tasks = []
         for gateway in self.gateways:
-            await gateway.setup_server()
+            tasks.append(asyncio.create_task(gateway.setup_server()))
+
+        await asyncio.gather(*tasks)
 
     async def shutdown(self):
         """Free other resources allocated with the server, e.g, gateway object, ..."""
+        shutdown_tasks = []
         for gateway in self.gateways:
-            await gateway.shutdown()
+            shutdown_tasks.append(asyncio.create_task(gateway.shutdown()))
+
+        await asyncio.gather(*shutdown_tasks)
 
     async def run_server(self):
         """Run GRPC server forever"""
+        run_server_tasks = []
         for gateway in self.gateways:
-            await gateway.run_server()
+            run_server_tasks.append(asyncio.create_task(gateway.run_server()))
+
+        await asyncio.gather(*run_server_tasks)
 
     @staticmethod
     def _deepcopy_with_ignore_attrs(obj: Any, ignore_attrs: List[str]) -> Any:

diff --git a/jina/serve/runtimes/head/__init__.py b/jina/serve/runtimes/head/__init__.py
@@ -1,4 +1,5 @@
 import argparse
+import asyncio
 import json
 import os
 from abc import ABC
@@ -158,24 +159,35 @@ async def async_setup(self):
                 service, health_pb2.HealthCheckResponse.SERVING
             )
         reflection.enable_server_reflection(service_names, self._grpc_server)
-     
+
         bind_addr = f'{self.args.host}:{self.args.port}'
         self._grpc_server.add_insecure_port(bind_addr)
         self.logger.debug(f'start listening on {bind_addr}')
         await self._grpc_server.start()
 
+    def _warmup(self):
+        self.warmup_task = asyncio.create_task(
+            self.request_handler.warmup(
+                connection_pool=self.connection_pool,
+                stop_event=self.warmup_stop_event,
+                deployment=self._deployment_name,
+            )
+        )
+
     async def async_run_forever(self):
         """Block until the GRPC server is terminated"""
+        self._warmup()
         await self._grpc_server.wait_for_termination()
 
     async def async_cancel(self):
         """Stop the GRPC server"""
         self.logger.debug('cancel HeadRuntime')
-
+        await self.cancel_warmup_task()
         await self._grpc_server.stop(0)
 
     async def async_teardown(self):
         """Close the connection pool"""
+        await self.cancel_warmup_task()
         await self._health_servicer.enter_graceful_shutdown()
         await self.async_cancel()
         await self.connection_pool.close()
@@ -294,6 +306,7 @@ async def _status(self, empty, context) -> jina_pb2.JinaInfoProto:
         :param context: grpc context
         :returns: the response request
         """
+        self.logger.debug('recv _status request')
         infoProto = jina_pb2.JinaInfoProto()
         version, env_info = get_full_version()
         for k, v in version.items():

diff --git a/jina/serve/runtimes/head/request_handling.py b/jina/serve/runtimes/head/request_handling.py
@@ -1,6 +1,9 @@
 import asyncio
-from typing import TYPE_CHECKING, Dict, Optional, Tuple
+import threading
+import time
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 
+from jina.serve.networking import GrpcConnectionPool
 from jina.serve.runtimes.monitoring import MonitoringRequestMixin
 from jina.serve.runtimes.worker.request_handling import WorkerRequestHandler
 
@@ -164,7 +167,9 @@ async def _handle_data_request(
         elif len(worker_results) > 1 and not reduce:
             # worker returned multiple responses, but the head is configured to skip reduction
             # just concatenate the docs in this case
-            response_request.data.docs = WorkerRequestHandler.get_docs_from_request(requests)
+            response_request.data.docs = WorkerRequestHandler.get_docs_from_request(
+                requests
+            )
 
         merged_metadata = self._merge_metadata(
             metadata,
@@ -177,3 +182,24 @@ async def _handle_data_request(
         self._update_end_request_metrics(response_request)
 
         return response_request, merged_metadata
+
+    async def warmup(
+        self,
+        connection_pool: GrpcConnectionPool,
+        stop_event: threading.Event,
+        deployment: str,
+    ):
+        '''Executes warmup task against the deployments from the connection pool.
+        :param connection_pool: GrpcConnectionPool that implements the warmup to the connected deployments.
+        :param stop_event: signal to indicate if an early termination of the task is required for graceful teardown.
+        :param deployment: deployment name that need to be warmed up.
+        '''
+        self.logger.debug(f'Running HeadRuntime warmup')
+
+        try:
+            await asyncio.create_task(
+                connection_pool.warmup(deployment=deployment, stop_event=stop_event)
+            )
+        except Exception as ex:
+            self.logger.error(f'error with HeadRuntime warmup up task: {ex}')
+            return
diff --git a/jina/serve/runtimes/worker/__init__.py b/jina/serve/runtimes/worker/__init__.py
@@ -142,7 +142,7 @@ async def _async_setup_grpc_server(self):
             self._health_servicer, self._grpc_server
         )
 
-        reflection.enable_server_reflection(service_names, self._grpc_server)     
+        reflection.enable_server_reflection(service_names, self._grpc_server)
         bind_addr = f'{self.args.host}:{self.args.port}'
         self.logger.debug(f'start listening on {bind_addr}')
         self._grpc_server.add_insecure_port(bind_addr)
@@ -306,6 +306,7 @@ async def _status(self, empty, context) -> jina_pb2.JinaInfoProto:
         :param context: grpc context
         :returns: the response request
         """
+        self.logger.debug('recv _status request')
         info_proto = jina_pb2.JinaInfoProto()
         version, env_info = get_full_version()
         for k, v in version.items():

diff --git a/jina/serve/runtimes/worker/request_handling.py b/jina/serve/runtimes/worker/request_handling.py
@@ -538,9 +538,7 @@ def get_docs_from_request(
         """
         if len(requests) > 1:
             result = DocumentArray(
-                    d
-                    for r in reversed(requests)
-                    for d in getattr(r, 'docs')
+                d for r in reversed(requests) for d in getattr(r, 'docs')
             )
         else:
             result = getattr(requests[0], 'docs')

diff --git a/jina/serve/streamer.py b/jina/serve/streamer.py
@@ -1,8 +1,11 @@
+import asyncio
 import json
 import os
+import threading
+import time
 from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Union
 
-from docarray import DocumentArray
+from docarray import Document, DocumentArray
 
 from jina.logging.logger import JinaLogger
 from jina.serve.networking import GrpcConnectionPool
@@ -65,6 +68,7 @@ def __init__(
         :param aio_tracing_client_interceptors: Optional list of aio grpc tracing server interceptors.
         :param tracing_client_interceptor: Optional gprc tracing server interceptor.
         """
+        self.logger = logger
         topology_graph = TopologyGraph(
             graph_representation=graph_representation,
             graph_conditions=graph_conditions,
@@ -78,6 +82,7 @@ def __init__(
         self.runtime_name = runtime_name
         self.aio_tracing_client_interceptors = aio_tracing_client_interceptors
         self.tracing_client_interceptor = tracing_client_interceptor
+        self._executor_addresses = executor_addresses
 
         self._connection_pool = self._create_connection_pool(
             executor_addresses,
@@ -221,3 +226,28 @@ def get_streamer():
     @staticmethod
     def _set_env_streamer_args(**kwargs):
         os.environ['JINA_STREAMER_ARGS'] = json.dumps(kwargs)
+
+    async def warmup(self, stop_event: threading.Event):
+        '''Executes warmup task on each deployment. This forces the gateway to establish connection and open a
+        gRPC channel to each executor so that the first request doesn't need to experience the penalty of
+        eastablishing a brand new gRPC channel.
+        :param stop_event: signal to indicate if an early termination of the task is required for graceful teardown.
+        '''
+        self.logger.debug(f'Running GatewayRuntime warmup')
+        deployments = {key for key in self._executor_addresses.keys()}
+
+        try:
+            deployment_warmup_tasks = []
+            for deployment in deployments:
+                deployment_warmup_tasks.append(
+                    asyncio.create_task(
+                        self._connection_pool.warmup(
+                            deployment=deployment, stop_event=stop_event
+                        )
+                    )
+                )
+
+            await asyncio.gather(*deployment_warmup_tasks, return_exceptions=True)
+        except Exception as ex:
+            self.logger.error(f'error with GatewayRuntime warmup up task: {ex}')
+            return