diff --git a/api/src/main/java/org/pmiops/workbench/api/ClusterController.java b/api/src/main/java/org/pmiops/workbench/api/ClusterController.java index 7eac4278356..f12739ad473 100644 --- a/api/src/main/java/org/pmiops/workbench/api/ClusterController.java +++ b/api/src/main/java/org/pmiops/workbench/api/ClusterController.java @@ -4,13 +4,11 @@ import com.google.gson.Gson; import java.net.MalformedURLException; import java.net.URL; -import java.sql.Timestamp; import java.time.Clock; import java.util.Base64; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.function.Function; import java.util.logging.Level; import java.util.logging.Logger; @@ -28,6 +26,7 @@ import org.pmiops.workbench.db.model.DbCdrVersion; import org.pmiops.workbench.db.model.DbUser; import org.pmiops.workbench.db.model.DbUser.ClusterConfig; +import org.pmiops.workbench.db.model.DbWorkspace; import org.pmiops.workbench.exceptions.BadRequestException; import org.pmiops.workbench.exceptions.NotFoundException; import org.pmiops.workbench.exceptions.ServerErrorException; @@ -38,14 +37,12 @@ import org.pmiops.workbench.model.ClusterLocalizeRequest; import org.pmiops.workbench.model.ClusterLocalizeResponse; import org.pmiops.workbench.model.ClusterStatus; -import org.pmiops.workbench.model.DefaultClusterResponse; import org.pmiops.workbench.model.EmptyResponse; import org.pmiops.workbench.model.ListClusterDeleteRequest; import org.pmiops.workbench.model.ListClusterResponse; import org.pmiops.workbench.model.UpdateClusterConfigRequest; import org.pmiops.workbench.model.WorkspaceAccessLevel; import org.pmiops.workbench.notebooks.LeonardoNotebooksClient; -import org.pmiops.workbench.notebooks.model.ClusterError; import org.pmiops.workbench.notebooks.model.StorageLink; import org.pmiops.workbench.workspaces.WorkspaceService; import org.springframework.beans.factory.annotation.Autowired; @@ -187,115 +184,99 @@ public ResponseEntity> deleteClustersInProject( return ResponseEntity.ok(clustersInProjectAffected); } + private DbWorkspace lookupWorkspace(String workspaceNamespace) throws NotFoundException { + return workspaceService + .getByNamespace(workspaceNamespace) + .orElseThrow(() -> new NotFoundException("Workspace not found: " + workspaceNamespace)); + } + @Override - public ResponseEntity listClusters( - String billingProjectId, String workspaceFirecloudName) { - if (billingProjectId == null) { - throw new BadRequestException("Must specify billing project"); - } + public ResponseEntity getCluster(String workspaceNamespace) { + String firecloudWorkspaceName = lookupWorkspace(workspaceNamespace).getFirecloudName(); workspaceService.enforceWorkspaceAccessLevel( - billingProjectId, workspaceFirecloudName, WorkspaceAccessLevel.READER); - workspaceService.validateActiveBilling(billingProjectId, workspaceFirecloudName); + workspaceNamespace, firecloudWorkspaceName, WorkspaceAccessLevel.WRITER); + workspaceService.validateActiveBilling(workspaceNamespace, firecloudWorkspaceName); - DbUser user = this.userProvider.get(); + org.pmiops.workbench.notebooks.model.Cluster firecloudCluster = + leonardoNotebooksClient.getCluster( + workspaceNamespace, clusterNameForUser(userProvider.get())); - String clusterName = clusterNameForUser(user); + return ResponseEntity.ok(TO_ALL_OF_US_CLUSTER.apply(firecloudCluster)); + } - org.pmiops.workbench.notebooks.model.Cluster fcCluster; - try { - fcCluster = this.leonardoNotebooksClient.getCluster(billingProjectId, clusterName); - } catch (NotFoundException e) { - fcCluster = - this.leonardoNotebooksClient.createCluster( - billingProjectId, clusterName, workspaceFirecloudName); - } + @Override + public ResponseEntity createCluster(String workspaceNamespace) { + String firecloudWorkspaceName = lookupWorkspace(workspaceNamespace).getFirecloudName(); + workspaceService.enforceWorkspaceAccessLevel( + workspaceNamespace, firecloudWorkspaceName, WorkspaceAccessLevel.WRITER); + workspaceService.validateActiveBilling(workspaceNamespace, firecloudWorkspaceName); - int retries = Optional.ofNullable(user.getClusterCreateRetries()).orElse(0); - if (org.pmiops.workbench.notebooks.model.ClusterStatus.ERROR.equals(fcCluster.getStatus())) { - if (retries <= 2) { - this.userService.setClusterRetryCount(retries + 1); - log.warning("Cluster has errored with logs: "); - if (fcCluster.getErrors() != null) { - for (ClusterError e : fcCluster.getErrors()) { - log.warning(e.getErrorMessage()); - } - } - log.warning("Retrying cluster creation."); - - this.leonardoNotebooksClient.deleteCluster(billingProjectId, clusterName); - } - } else if (org.pmiops.workbench.notebooks.model.ClusterStatus.RUNNING.equals( - fcCluster.getStatus()) - && retries != 0) { - this.userService.setClusterRetryCount(0); - } - DefaultClusterResponse resp = new DefaultClusterResponse(); - resp.setDefaultCluster(TO_ALL_OF_US_CLUSTER.apply(fcCluster)); - return ResponseEntity.ok(resp); + org.pmiops.workbench.notebooks.model.Cluster firecloudCluster = + leonardoNotebooksClient.createCluster( + workspaceNamespace, clusterNameForUser(userProvider.get()), firecloudWorkspaceName); + + return ResponseEntity.ok(TO_ALL_OF_US_CLUSTER.apply(firecloudCluster)); } @Override - public ResponseEntity deleteCluster(String projectName, String clusterName) { - this.userService.setClusterRetryCount(0); - this.leonardoNotebooksClient.deleteCluster(projectName, clusterName); + public ResponseEntity deleteCluster(String workspaceNamespace) { + String firecloudWorkspaceName = lookupWorkspace(workspaceNamespace).getFirecloudName(); + workspaceService.enforceWorkspaceAccessLevel( + workspaceNamespace, firecloudWorkspaceName, WorkspaceAccessLevel.WRITER); + + leonardoNotebooksClient.deleteCluster( + workspaceNamespace, clusterNameForUser(userProvider.get())); return ResponseEntity.ok(new EmptyResponse()); } @Override public ResponseEntity localize( - String projectName, String clusterName, ClusterLocalizeRequest body) { + String workspaceNamespace, ClusterLocalizeRequest body) { + DbWorkspace dbWorkspace = lookupWorkspace(workspaceNamespace); workspaceService.enforceWorkspaceAccessLevel( - body.getWorkspaceNamespace(), body.getWorkspaceId(), WorkspaceAccessLevel.READER); - workspaceService.validateActiveBilling(body.getWorkspaceNamespace(), body.getWorkspaceId()); + dbWorkspace.getWorkspaceNamespace(), + dbWorkspace.getFirecloudName(), + WorkspaceAccessLevel.WRITER); + workspaceService.validateActiveBilling( + dbWorkspace.getWorkspaceNamespace(), dbWorkspace.getFirecloudName()); - FirecloudWorkspace fcWorkspace; + final FirecloudWorkspace firecloudWorkspace; try { - fcWorkspace = + firecloudWorkspace = fireCloudService - .getWorkspace(body.getWorkspaceNamespace(), body.getWorkspaceId()) + .getWorkspace(dbWorkspace.getWorkspaceNamespace(), dbWorkspace.getFirecloudName()) .getWorkspace(); } catch (NotFoundException e) { throw new NotFoundException( String.format( "workspace %s/%s not found or not accessible", - body.getWorkspaceNamespace(), body.getWorkspaceId())); + dbWorkspace.getWorkspaceNamespace(), dbWorkspace.getFirecloudName())); } - DbCdrVersion cdrVersion = - workspaceService - .getRequired(body.getWorkspaceNamespace(), body.getWorkspaceId()) - .getCdrVersion(); + DbCdrVersion cdrVersion = dbWorkspace.getCdrVersion(); // For the common case where the notebook cluster matches the workspace // namespace, simply name the directory as the workspace ID; else we // include the namespace in the directory name to avoid possible conflicts // in workspace IDs. - String gcsNotebooksDir = "gs://" + fcWorkspace.getBucketName() + "/notebooks"; - Timestamp now = new Timestamp(clock.instant().toEpochMilli()); - long workspaceId = - workspaceService - .getRequired(body.getWorkspaceNamespace(), body.getWorkspaceId()) - .getWorkspaceId(); + String gcsNotebooksDir = "gs://" + firecloudWorkspace.getBucketName() + "/notebooks"; + long workspaceId = dbWorkspace.getWorkspaceId(); body.getNotebookNames() .forEach( - notebook -> + notebookName -> userRecentResourceService.updateNotebookEntry( - workspaceId, userProvider.get().getUserId(), gcsNotebooksDir + "/" + notebook)); - String workspacePath = body.getWorkspaceId(); - if (!projectName.equals(body.getWorkspaceNamespace())) { - workspacePath = - body.getWorkspaceNamespace() - + FireCloudService.WORKSPACE_DELIMITER - + body.getWorkspaceId(); - } + workspaceId, + userProvider.get().getUserId(), + gcsNotebooksDir + "/" + notebookName)); + String workspacePath = dbWorkspace.getFirecloudName(); String editDir = "workspaces/" + workspacePath; String playgroundDir = "workspaces_playground/" + workspacePath; String targetDir = body.getPlaygroundMode() ? playgroundDir : editDir; leonardoNotebooksClient.createStorageLink( - projectName, - clusterName, + workspaceNamespace, + clusterNameForUser(userProvider.get()), new StorageLink() .cloudStorageDirectory(gcsNotebooksDir) .localBaseDirectory(editDir) @@ -307,7 +288,7 @@ public ResponseEntity localize( // The Welder extension offers direct links to/from playground mode; write the AoU config file // to both locations so notebooks will work in either directory. - String aouConfigUri = aouConfigDataUri(fcWorkspace, cdrVersion, projectName); + String aouConfigUri = aouConfigDataUri(firecloudWorkspace, cdrVersion, workspaceNamespace); localizeMap.put(editDir + "/" + AOU_CONFIG_FILENAME, aouConfigUri); localizeMap.put(playgroundDir + "/" + AOU_CONFIG_FILENAME, aouConfigUri); @@ -319,8 +300,9 @@ public ResponseEntity localize( Collectors.toMap( name -> targetDir + "/" + name, name -> gcsNotebooksDir + "/" + name))); } - - leonardoNotebooksClient.localize(projectName, clusterName, localizeMap); + log.info(localizeMap.toString()); + leonardoNotebooksClient.localize( + workspaceNamespace, clusterNameForUser(userProvider.get()), localizeMap); // This is the Jupyer-server-root-relative path, the style used by the Jupyter REST API. return ResponseEntity.ok(new ClusterLocalizeResponse().clusterLocalDirectory(targetDir)); diff --git a/api/src/main/java/org/pmiops/workbench/notebooks/LeonardoNotebooksClientImpl.java b/api/src/main/java/org/pmiops/workbench/notebooks/LeonardoNotebooksClientImpl.java index aed923d362b..ae9165f568e 100644 --- a/api/src/main/java/org/pmiops/workbench/notebooks/LeonardoNotebooksClientImpl.java +++ b/api/src/main/java/org/pmiops/workbench/notebooks/LeonardoNotebooksClientImpl.java @@ -15,6 +15,7 @@ import org.pmiops.workbench.db.model.DbUser.ClusterConfig; import org.pmiops.workbench.db.model.DbWorkspace; import org.pmiops.workbench.db.model.DbWorkspace.BillingMigrationStatus; +import org.pmiops.workbench.exceptions.ExceptionUtils; import org.pmiops.workbench.notebooks.api.ClusterApi; import org.pmiops.workbench.notebooks.api.NotebooksApi; import org.pmiops.workbench.notebooks.api.StatusApi; @@ -171,7 +172,12 @@ public void deleteCluster(String googleProject, String clusterName) { @Override public Cluster getCluster(String googleProject, String clusterName) { ClusterApi clusterApi = clusterApiProvider.get(); - return retryHandler.run((context) -> clusterApi.getCluster(googleProject, clusterName)); + try { + return retryHandler.runAndThrowChecked( + (context) -> clusterApi.getCluster(googleProject, clusterName)); + } catch (ApiException e) { + throw ExceptionUtils.convertNotebookException(e); + } } @Override diff --git a/api/src/main/java/org/pmiops/workbench/workspaces/WorkspaceService.java b/api/src/main/java/org/pmiops/workbench/workspaces/WorkspaceService.java index a61bcffef15..6a3955ae35e 100644 --- a/api/src/main/java/org/pmiops/workbench/workspaces/WorkspaceService.java +++ b/api/src/main/java/org/pmiops/workbench/workspaces/WorkspaceService.java @@ -8,6 +8,7 @@ import org.pmiops.workbench.db.model.DbUserRecentWorkspace; import org.pmiops.workbench.db.model.DbWorkspace; import org.pmiops.workbench.exceptions.ForbiddenException; +import org.pmiops.workbench.exceptions.NotFoundException; import org.pmiops.workbench.firecloud.FireCloudService; import org.pmiops.workbench.firecloud.model.FirecloudWorkspaceACLUpdate; import org.pmiops.workbench.firecloud.model.FirecloudWorkspaceAccessEntry; @@ -33,6 +34,8 @@ public interface WorkspaceService { List getWorkspacesAndPublicWorkspaces(); + WorkspaceResponse getWorkspace(String workspaceNamespace) throws NotFoundException; + WorkspaceResponse getWorkspace(String workspaceNamespace, String workspaceId); List getWorkspaces(); diff --git a/api/src/main/java/org/pmiops/workbench/workspaces/WorkspaceServiceImpl.java b/api/src/main/java/org/pmiops/workbench/workspaces/WorkspaceServiceImpl.java index 92ccda3d6f5..f7942f229b0 100644 --- a/api/src/main/java/org/pmiops/workbench/workspaces/WorkspaceServiceImpl.java +++ b/api/src/main/java/org/pmiops/workbench/workspaces/WorkspaceServiceImpl.java @@ -204,18 +204,32 @@ public List getWorkspacesAndPublicWorkspaces() { .collect(Collectors.toList()); } + @Transactional + @Override + public WorkspaceResponse getWorkspace(String workspaceNamespace) throws NotFoundException { + DbWorkspace dbWorkspace = + getByNamespace(workspaceNamespace) + .orElseThrow(() -> new NotFoundException("Workspace not found: " + workspaceNamespace)); + return getWorkspaceImpl(dbWorkspace); + } + @Transactional @Override public WorkspaceResponse getWorkspace(String workspaceNamespace, String workspaceId) { DbWorkspace dbWorkspace = getRequired(workspaceNamespace, workspaceId); + return getWorkspaceImpl(dbWorkspace); + } + private WorkspaceResponse getWorkspaceImpl(DbWorkspace dbWorkspace) { FirecloudWorkspaceResponse fcResponse; FirecloudWorkspace fcWorkspace; WorkspaceResponse workspaceResponse = new WorkspaceResponse(); // This enforces access controls. - fcResponse = fireCloudService.getWorkspace(workspaceNamespace, workspaceId); + fcResponse = + fireCloudService.getWorkspace( + dbWorkspace.getWorkspaceNamespace(), dbWorkspace.getFirecloudName()); fcWorkspace = fcResponse.getWorkspace(); if (fcResponse.getAccessLevel().equals(WorkspaceService.PROJECT_OWNER_ACCESS_LEVEL)) { diff --git a/api/src/main/resources/workbench.yaml b/api/src/main/resources/workbench.yaml index 5920f95ee54..0779a8b14d2 100644 --- a/api/src/main/resources/workbench.yaml +++ b/api/src/main/resources/workbench.yaml @@ -71,6 +71,21 @@ parameters: format: int64 required: true description: Data set ID + workspaceNamespace: + in: path + name: workspaceNamespace + type: string + required: true + description: The workspace namespace, aka Google Cloud project ID. + workspaceId: + in: path + name: workspaceId + type: string + required: true + description: > + The workspace ID, aka the Firecloud workspace ID. This is actually an immutable string, + which in AoU looks like a lower-cased concatenation of the workspace name when it was first + created. paths: /v1/status: @@ -583,88 +598,77 @@ paths: schema: $ref: '#/definitions/ErrorResponse' - /v1/clusters/{billingProjectId}/{workspaceFirecloudName}: + /v1/clusters/{workspaceNamespace}: get: - summary: List available notebook clusters + summary: Get the user's workspace cluster. description: > - Returns the clusters available to the current user in the given billing project. - Currently there is a single default cluster supported per billing project - and this cluster should always either exist or be in the process of being - initialized. In a future where researchers have more control over cluster - creation, this endpoint would be extended to return all clusters. - operationId: listClusters + Returns the current user's cluster, if any, which has been created for the given workspace. + operationId: getCluster tags: - cluster parameters: - - in: path - name: billingProjectId - description: The unique identifier of the Google Billing Project containing the clusters - required: true - type: string - - in: path - name: workspaceFirecloudName - description: The firecloudName of the workspace whose notebook we're looking at - required: true - type: string + - $ref: '#/parameters/workspaceNamespace' responses: 200: - description: The users cluster + description: The cluster for this user and workspace. schema: - $ref: '#/definitions/DefaultClusterResponse' - 500: - description: Internal Error + $ref: '#/definitions/Cluster' + 404: + description: No cluster exists for this user and workspace. schema: $ref: '#/definitions/ErrorResponse' - - /v1/clusters/{clusterNamespace}/{clusterName}: + post: + summary: Create a workspace cluster. + description: > + Creates a new cluster for the current user in the given billing project. If a cluster already + exists for the user in this billing project, a 409 conflict error is returned (even if the cluster + is still initializing or is not in a ready state). + operationId: createCluster + tags: + - cluster + parameters: + # TODO(RW-3697): Custom cluster creation params should be added as a body param here. + - $ref: '#/parameters/workspaceNamespace' + responses: + 200: + description: Returns the cluster that was created for this user and workspace. + schema: + $ref: '#/definitions/Cluster' + 409: + description: A cluster for this user and workspace already exists. + schema: + $ref: "#/definitions/EmptyResponse" + # TODO(RW-3695): updateCluster should be added as a PATCH endpoint here. delete: - summary: Delete a cluster by name. + summary: Delete a workspace cluster. operationId: deleteCluster tags: - cluster parameters: - - in: path - name: clusterNamespace - description: clusterNamespace - required: true - type: string - - in: path - name: clusterName - description: clusterName - required: true - type: string + - $ref: '#/parameters/workspaceNamespace' responses: 200: - description: Deletion success + description: Success schema: $ref: '#/definitions/EmptyResponse' - 500: - description: Internal Error + 404: + description: No cluster exists for this user and workspace. schema: - $ref: '#/definitions/ErrorResponse' + $ref: "#/definitions/ErrorResponse" - /v1/clusters/{clusterNamespace}/{clusterName}/localize: + /v1/clusters/{workspaceNamespace}/localize: post: - summary: > - Localize files from a workspace to notebook cluster. As a side-effect, + summary: Localize files to the user's cluster. + description: > + Localizes files to the cluster for the current user and given workspace. As a side-effect, JSON workspace environment files will also be localized to the cluster. - description: Localize notebook files to the corresponding notebook cluster. operationId: localize tags: - cluster consumes: - application/json parameters: - - in: path - name: clusterNamespace - description: clusterNamespace - required: true - type: string - - in: path - name: clusterName - description: clusterName - required: true - type: string + - $ref: '#/parameters/workspaceNamespace' - in: body name: body description: Localization request. @@ -4068,12 +4072,20 @@ definitions: format: int64 description: Milliseconds since the UNIX epoch. - DefaultClusterResponse: + GetClusterResponse: + type: object + properties: + cluster: + description: The cluster associated with a given user and workspace. May be empty. + $ref: "#/definitions/Cluster" + + CreateClusterResponse: type: object required: - - defaultCluster + - cluster properties: - defaultCluster: + cluster: + description: The cluster that was created. $ref: "#/definitions/Cluster" ListClusterResponse: @@ -4117,17 +4129,9 @@ definitions: ClusterLocalizeRequest: type: object required: - - workspaceNamespace - - workspaceId - notebookNames - playgroundMode properties: - workspaceNamespace: - type: string - description: Workspace namespace from which to source notebooks - workspaceId: - type: string - description: Workspace from which to source notebooks notebookNames: type: array description: > diff --git a/api/src/test/java/org/pmiops/workbench/api/ClusterControllerTest.java b/api/src/test/java/org/pmiops/workbench/api/ClusterControllerTest.java index 5b8ac8efef0..bac7881b78d 100644 --- a/api/src/test/java/org/pmiops/workbench/api/ClusterControllerTest.java +++ b/api/src/test/java/org/pmiops/workbench/api/ClusterControllerTest.java @@ -20,6 +20,7 @@ import java.util.Base64; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Random; import java.util.stream.Collectors; import org.json.JSONObject; @@ -32,7 +33,6 @@ import org.pmiops.workbench.actionaudit.auditors.UserServiceAuditor; import org.pmiops.workbench.compliance.ComplianceService; import org.pmiops.workbench.config.WorkbenchConfig; -import org.pmiops.workbench.config.WorkbenchConfig.FeatureFlagsConfig; import org.pmiops.workbench.db.dao.AdminActionHistoryDao; import org.pmiops.workbench.db.dao.UserDao; import org.pmiops.workbench.db.dao.UserRecentResourceService; @@ -40,7 +40,6 @@ import org.pmiops.workbench.db.model.DbCdrVersion; import org.pmiops.workbench.db.model.DbUser; import org.pmiops.workbench.db.model.DbWorkspace; -import org.pmiops.workbench.exceptions.BadRequestException; import org.pmiops.workbench.exceptions.ForbiddenException; import org.pmiops.workbench.exceptions.NotFoundException; import org.pmiops.workbench.firecloud.FireCloudService; @@ -63,6 +62,8 @@ import org.pmiops.workbench.notebooks.model.ListClusterResponse; import org.pmiops.workbench.test.FakeClock; import org.pmiops.workbench.test.FakeLongRandom; +import org.pmiops.workbench.utils.WorkspaceMapper; +import org.pmiops.workbench.utils.WorkspaceMapperImpl; import org.pmiops.workbench.workspaces.WorkspaceService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.autoconfigure.orm.jpa.DataJpaTest; @@ -84,12 +85,17 @@ @Transactional(propagation = Propagation.NOT_SUPPORTED) public class ClusterControllerTest { - private static final String BILLING_PROJECT_ID = "proj"; - private static final String BILLING_PROJECT_ID_2 = "proj2"; + private static final String BILLING_PROJECT_ID = "aou-rw-1234"; + private static final String BILLING_PROJECT_ID_2 = "aou-rw-5678"; // a workspace's namespace is always its billing project ID private static final String WORKSPACE_NS = BILLING_PROJECT_ID; - private static final String WORKSPACE_ID = "wsid"; - private static final String WORKSPACE_NAME = "wsn"; + // Workspace ID is also known as firecloud_name. This identifier is generated by + // Firecloud, based on the name of the workspace upon first creation. Firecloud + // tends to remove whitespace and punctuation, lowercase everything, and concatenate + // it together. Note that when a workspace name changes, the Firecloud name stays + // the same. + private static final String WORKSPACE_ID = "myfirstworkspace"; + private static final String WORKSPACE_NAME = "My First Workspace"; private static final String LOGGED_IN_USER_EMAIL = "bob@gmail.com"; private static final String OTHER_USER_EMAIL = "alice@gmail.com"; private static final String BUCKET_NAME = "workspace-bucket"; @@ -110,19 +116,9 @@ public class ClusterControllerTest { UserServiceImpl.class, InstitutionServiceImpl.class, InstitutionMapperImpl.class, + WorkspaceMapperImpl.class, PublicInstitutionDetailsMapperImpl.class }) - @MockBean({ - ClusterAuditor.class, - FireCloudService.class, - LeonardoNotebooksClient.class, - WorkspaceService.class, - UserRecentResourceService.class, - ComplianceService.class, - DirectoryService.class, - AdminActionHistoryDao.class, - UserServiceAuditor.class - }) static class Configuration { @Bean @@ -150,14 +146,19 @@ Random random() { @Captor private ArgumentCaptor> mapCaptor; - @Autowired ClusterAuditor clusterAuditor; - @Autowired LeonardoNotebooksClient notebookService; - @Autowired FireCloudService fireCloudService; + @MockBean AdminActionHistoryDao mockAdminActionHistoryDao; + @MockBean ClusterAuditor mockClusterAuditor; + @MockBean ComplianceService mockComplianceService; + @MockBean DirectoryService mockDirectoryService; + @MockBean FireCloudService mockFireCloudService; + @MockBean LeonardoNotebooksClient mockLeoNotebooksClient; + @MockBean UserRecentResourceService mockUserRecentResourceService; + @MockBean UserServiceAuditor mockUserServiceAuditor; + @MockBean WorkspaceService mockWorkspaceService; + @Autowired UserDao userDao; - @Autowired WorkspaceService workspaceService; + @Autowired WorkspaceMapper workspaceMapper; @Autowired ClusterController clusterController; - @Autowired UserRecentResourceService userRecentResourceService; - @Autowired Clock clock; private DbCdrVersion cdrVersion; private org.pmiops.workbench.notebooks.model.Cluster testFcCluster; @@ -173,16 +174,12 @@ Random random() { @Before public void setUp() { - config = new WorkbenchConfig(); - config.server = new WorkbenchConfig.ServerConfig(); + config = WorkbenchConfig.createEmptyConfig(); config.server.apiBaseUrl = API_BASE_URL; - config.firecloud = new WorkbenchConfig.FireCloudConfig(); config.firecloud.registeredDomainName = ""; config.firecloud.clusterDefaultMachineType = "n1-standard-4"; config.firecloud.clusterDefaultDiskSizeGb = 50; - config.access = new WorkbenchConfig.AccessConfig(); config.access.enableComplianceTraining = true; - config.featureFlags = new FeatureFlagsConfig(); user = new DbUser(); user.setUsername(LOGGED_IN_USER_EMAIL); @@ -251,7 +248,8 @@ public void setUp() { testWorkspace.setName(WORKSPACE_NAME); testWorkspace.setFirecloudName(WORKSPACE_ID); testWorkspace.setCdrVersion(cdrVersion); - doReturn(testWorkspace).when(workspaceService).get(WORKSPACE_NS, WORKSPACE_ID); + doReturn(testWorkspace).when(mockWorkspaceService).get(WORKSPACE_NS, WORKSPACE_ID); + doReturn(Optional.of(testWorkspace)).when(mockWorkspaceService).getByNamespace(WORKSPACE_NS); } private FirecloudWorkspace createFcWorkspace(String ns, String name, String creator) { @@ -262,20 +260,21 @@ private FirecloudWorkspace createFcWorkspace(String ns, String name, String crea .bucketName(BUCKET_NAME); } - private void stubGetWorkspace(String ns, String name, String creator) { + private void stubGetWorkspace(String workspaceNamespace, String firecloudName, String creator) { DbWorkspace w = new DbWorkspace(); - w.setWorkspaceNamespace(ns); - w.setFirecloudName(name); + w.setWorkspaceNamespace(workspaceNamespace); + w.setFirecloudName(firecloudName); w.setCdrVersion(cdrVersion); - when(workspaceService.getRequired(ns, name)).thenReturn(w); - stubGetFcWorkspace(createFcWorkspace(ns, name, creator)); + when(mockWorkspaceService.getRequired(workspaceNamespace, firecloudName)).thenReturn(w); + when(mockWorkspaceService.getByNamespace(workspaceNamespace)).thenReturn(Optional.of(w)); + stubGetFcWorkspace(createFcWorkspace(workspaceNamespace, firecloudName, creator)); } private void stubGetFcWorkspace(FirecloudWorkspace fcWorkspace) { FirecloudWorkspaceResponse fcResponse = new FirecloudWorkspaceResponse(); fcResponse.setWorkspace(fcWorkspace); fcResponse.setAccessLevel(WorkspaceAccessLevel.OWNER.toString()); - when(fireCloudService.getWorkspace(fcWorkspace.getNamespace(), fcWorkspace.getName())) + when(mockFireCloudService.getWorkspace(fcWorkspace.getNamespace(), fcWorkspace.getName())) .thenReturn(fcResponse); } @@ -290,31 +289,40 @@ private String getClusterName() { } @Test - public void testListClusters() { - when(notebookService.getCluster(BILLING_PROJECT_ID, getClusterName())) + public void testGetCluster() { + when(mockLeoNotebooksClient.getCluster(BILLING_PROJECT_ID, getClusterName())) .thenReturn(testFcCluster); - assertThat( - clusterController - .listClusters(BILLING_PROJECT_ID, WORKSPACE_NAME) - .getBody() - .getDefaultCluster()) - .isEqualTo(testCluster); + assertThat(clusterController.getCluster(BILLING_PROJECT_ID).getBody()).isEqualTo(testCluster); + } + + @Test + public void testGetCluster_UnknownStatus() { + when(mockLeoNotebooksClient.getCluster(BILLING_PROJECT_ID, getClusterName())) + .thenReturn(testFcCluster.status(null)); + + assertThat(clusterController.getCluster(BILLING_PROJECT_ID).getBody().getStatus()) + .isEqualTo(ClusterStatus.UNKNOWN); + } + + @Test(expected = NotFoundException.class) + public void testGetCluster_NullBillingProject() { + clusterController.getCluster(null); } @Test public void testDeleteClustersInProject() { List listClusterResponseList = ImmutableList.of(testFcClusterListResponse); - when(notebookService.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) + when(mockLeoNotebooksClient.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) .thenReturn(listClusterResponseList); clusterController.deleteClustersInProject( BILLING_PROJECT_ID, new ListClusterDeleteRequest() .clustersToDelete(ImmutableList.of(testFcCluster.getClusterName()))); - verify(notebookService) + verify(mockLeoNotebooksClient) .deleteClusterAsAdmin(BILLING_PROJECT_ID, testFcCluster.getClusterName()); - verify(clusterAuditor) + verify(mockClusterAuditor) .fireDeleteClustersInProject( BILLING_PROJECT_ID, listClusterResponseList.stream() @@ -323,49 +331,49 @@ public void testDeleteClustersInProject() { } @Test - public void testDeleteClustersInProjectDeleteSome() { + public void testDeleteClustersInProject_DeleteSome() { List listClusterResponseList = ImmutableList.of(testFcClusterListResponse, testFcClusterListResponse2); List clustersToDelete = ImmutableList.of(testFcCluster.getClusterName()); - when(notebookService.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) + when(mockLeoNotebooksClient.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) .thenReturn(listClusterResponseList); clusterController.deleteClustersInProject( BILLING_PROJECT_ID, new ListClusterDeleteRequest().clustersToDelete(clustersToDelete)); - verify(notebookService, times(clustersToDelete.size())) + verify(mockLeoNotebooksClient, times(clustersToDelete.size())) .deleteClusterAsAdmin(BILLING_PROJECT_ID, testFcCluster.getClusterName()); - verify(clusterAuditor, times(1)) + verify(mockClusterAuditor, times(1)) .fireDeleteClustersInProject(BILLING_PROJECT_ID, clustersToDelete); } @Test - public void testDeleteClustersInProjectDeleteDoesNotAffectOtherProjects() { + public void testDeleteClustersInProject_DeleteDoesNotAffectOtherProjects() { List listClusterResponseList = ImmutableList.of(testFcClusterListResponse, testFcClusterListResponse2); List clustersToDelete = ImmutableList.of(testFcClusterDifferentProject.getClusterName()); - when(notebookService.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) + when(mockLeoNotebooksClient.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) .thenReturn(listClusterResponseList); clusterController.deleteClustersInProject( BILLING_PROJECT_ID, new ListClusterDeleteRequest().clustersToDelete(clustersToDelete)); - verify(notebookService, times(0)) + verify(mockLeoNotebooksClient, times(0)) .deleteClusterAsAdmin(BILLING_PROJECT_ID, testFcCluster.getClusterName()); - verify(clusterAuditor, times(0)) + verify(mockClusterAuditor, times(0)) .fireDeleteClustersInProject(BILLING_PROJECT_ID, clustersToDelete); } @Test - public void testDeleteClustersInProjectNoClusters() { + public void testDeleteClustersInProject_NoClusters() { List listClusterResponseList = ImmutableList.of(testFcClusterListResponse); - when(notebookService.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) + when(mockLeoNotebooksClient.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) .thenReturn(listClusterResponseList); clusterController.deleteClustersInProject( BILLING_PROJECT_ID, new ListClusterDeleteRequest().clustersToDelete(ImmutableList.of())); - verify(notebookService, never()) + verify(mockLeoNotebooksClient, never()) .deleteClusterAsAdmin(BILLING_PROJECT_ID, testFcCluster.getClusterName()); - verify(clusterAuditor, never()) + verify(mockClusterAuditor, never()) .fireDeleteClustersInProject( BILLING_PROJECT_ID, listClusterResponseList.stream() @@ -374,16 +382,16 @@ public void testDeleteClustersInProjectNoClusters() { } @Test - public void testDeleteClustersInProjectNullClustersList() { + public void testDeleteClustersInProject_NullClustersList() { List listClusterResponseList = ImmutableList.of(testFcClusterListResponse); - when(notebookService.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) + when(mockLeoNotebooksClient.listClustersByProjectAsAdmin(BILLING_PROJECT_ID)) .thenReturn(listClusterResponseList); clusterController.deleteClustersInProject( BILLING_PROJECT_ID, new ListClusterDeleteRequest().clustersToDelete(null)); - verify(notebookService) + verify(mockLeoNotebooksClient) .deleteClusterAsAdmin(BILLING_PROJECT_ID, testFcCluster.getClusterName()); - verify(clusterAuditor) + verify(mockClusterAuditor) .fireDeleteClustersInProject( BILLING_PROJECT_ID, listClusterResponseList.stream() @@ -392,45 +400,22 @@ public void testDeleteClustersInProjectNullClustersList() { } @Test - public void testListClustersUnknownStatus() { - when(notebookService.getCluster(BILLING_PROJECT_ID, getClusterName())) - .thenReturn(testFcCluster.status(null)); - - assertThat( - clusterController - .listClusters(BILLING_PROJECT_ID, WORKSPACE_NAME) - .getBody() - .getDefaultCluster() - .getStatus()) - .isEqualTo(ClusterStatus.UNKNOWN); - } - - @Test(expected = BadRequestException.class) - public void testListClustersNullBillingProject() { - clusterController.listClusters(null, WORKSPACE_NAME); - } - - @Test - public void testListClustersLazyCreate() { - when(notebookService.getCluster(BILLING_PROJECT_ID, getClusterName())) + public void testCreateCluster() { + when(mockLeoNotebooksClient.getCluster(BILLING_PROJECT_ID, getClusterName())) .thenThrow(new NotFoundException()); - when(notebookService.createCluster( - eq(BILLING_PROJECT_ID), eq(getClusterName()), eq(WORKSPACE_NAME))) + when(mockLeoNotebooksClient.createCluster( + eq(BILLING_PROJECT_ID), eq(getClusterName()), eq(WORKSPACE_ID))) .thenReturn(testFcCluster); - stubGetWorkspace(WORKSPACE_NS, WORKSPACE_NAME, "test"); + stubGetWorkspace(WORKSPACE_NS, WORKSPACE_ID, "test"); - assertThat( - clusterController - .listClusters(BILLING_PROJECT_ID, WORKSPACE_NAME) - .getBody() - .getDefaultCluster()) + assertThat(clusterController.createCluster(BILLING_PROJECT_ID).getBody()) .isEqualTo(testCluster); } @Test public void testDeleteCluster() { - clusterController.deleteCluster(BILLING_PROJECT_ID, "cluster"); - verify(notebookService).deleteCluster(BILLING_PROJECT_ID, "cluster"); + clusterController.deleteCluster(BILLING_PROJECT_ID); + verify(mockLeoNotebooksClient).deleteCluster(BILLING_PROJECT_ID, getClusterName()); } @Test @@ -477,29 +462,29 @@ public void testUpdateClusterConfigUserNotFound() { public void testLocalize() { ClusterLocalizeRequest req = new ClusterLocalizeRequest() - .workspaceNamespace(WORKSPACE_NS) - .workspaceId(WORKSPACE_ID) .notebookNames(ImmutableList.of("foo.ipynb")) .playgroundMode(false); stubGetWorkspace(WORKSPACE_NS, WORKSPACE_ID, LOGGED_IN_USER_EMAIL); - ClusterLocalizeResponse resp = - clusterController.localize(BILLING_PROJECT_ID, "cluster", req).getBody(); - assertThat(resp.getClusterLocalDirectory()).isEqualTo("workspaces/wsid"); + ClusterLocalizeResponse resp = clusterController.localize(BILLING_PROJECT_ID, req).getBody(); + assertThat(resp.getClusterLocalDirectory()).isEqualTo("workspaces/myfirstworkspace"); - verify(notebookService).localize(eq(BILLING_PROJECT_ID), eq("cluster"), mapCaptor.capture()); + verify(mockLeoNotebooksClient) + .localize(eq(BILLING_PROJECT_ID), eq(getClusterName()), mapCaptor.capture()); Map localizeMap = mapCaptor.getValue(); assertThat(localizeMap.keySet()) .containsExactly( - "workspaces/wsid/foo.ipynb", - "workspaces_playground/wsid/.all_of_us_config.json", - "workspaces/wsid/.all_of_us_config.json"); + "workspaces/myfirstworkspace/foo.ipynb", + "workspaces_playground/myfirstworkspace/.all_of_us_config.json", + "workspaces/myfirstworkspace/.all_of_us_config.json"); assertThat(localizeMap) - .containsEntry("workspaces/wsid/foo.ipynb", "gs://workspace-bucket/notebooks/foo.ipynb"); - JSONObject aouJson = dataUriToJson(localizeMap.get("workspaces/wsid/.all_of_us_config.json")); + .containsEntry( + "workspaces/myfirstworkspace/foo.ipynb", "gs://workspace-bucket/notebooks/foo.ipynb"); + JSONObject aouJson = + dataUriToJson(localizeMap.get("workspaces/myfirstworkspace/.all_of_us_config.json")); assertThat(aouJson.getString("WORKSPACE_ID")).isEqualTo(WORKSPACE_ID); assertThat(aouJson.getString("BILLING_CLOUD_PROJECT")).isEqualTo(BILLING_PROJECT_ID); assertThat(aouJson.getString("API_HOST")).isEqualTo(API_HOST); - verify(userRecentResourceService, times(1)) + verify(mockUserRecentResourceService, times(1)) .updateNotebookEntry(anyLong(), anyLong(), anyString()); } @@ -507,126 +492,109 @@ public void testLocalize() { public void testLocalize_playgroundMode() { ClusterLocalizeRequest req = new ClusterLocalizeRequest() - .workspaceNamespace(WORKSPACE_NS) - .workspaceId(WORKSPACE_ID) .notebookNames(ImmutableList.of("foo.ipynb")) .playgroundMode(true); stubGetWorkspace(WORKSPACE_NS, WORKSPACE_ID, LOGGED_IN_USER_EMAIL); - ClusterLocalizeResponse resp = - clusterController.localize(BILLING_PROJECT_ID, "cluster", req).getBody(); - assertThat(resp.getClusterLocalDirectory()).isEqualTo("workspaces_playground/wsid"); - verify(notebookService).localize(eq(BILLING_PROJECT_ID), eq("cluster"), mapCaptor.capture()); + ClusterLocalizeResponse resp = clusterController.localize(BILLING_PROJECT_ID, req).getBody(); + assertThat(resp.getClusterLocalDirectory()).isEqualTo("workspaces_playground/myfirstworkspace"); + verify(mockLeoNotebooksClient) + .localize(eq(BILLING_PROJECT_ID), eq(getClusterName()), mapCaptor.capture()); Map localizeMap = mapCaptor.getValue(); assertThat(localizeMap.keySet()) .containsExactly( - "workspaces_playground/wsid/foo.ipynb", - "workspaces_playground/wsid/.all_of_us_config.json", - "workspaces/wsid/.all_of_us_config.json"); + "workspaces_playground/myfirstworkspace/foo.ipynb", + "workspaces_playground/myfirstworkspace/.all_of_us_config.json", + "workspaces/myfirstworkspace/.all_of_us_config.json"); assertThat(localizeMap) .containsEntry( - "workspaces_playground/wsid/foo.ipynb", "gs://workspace-bucket/notebooks/foo.ipynb"); + "workspaces_playground/myfirstworkspace/foo.ipynb", + "gs://workspace-bucket/notebooks/foo.ipynb"); } @Test public void testLocalize_differentNamespace() { ClusterLocalizeRequest req = new ClusterLocalizeRequest() - .workspaceNamespace(WORKSPACE_NS) - .workspaceId(WORKSPACE_ID) .notebookNames(ImmutableList.of("foo.ipynb")) .playgroundMode(false); stubGetWorkspace(WORKSPACE_NS, WORKSPACE_ID, LOGGED_IN_USER_EMAIL); - ClusterLocalizeResponse resp = - clusterController.localize("other-proj", "cluster", req).getBody(); - verify(notebookService).localize(eq("other-proj"), eq("cluster"), mapCaptor.capture()); + stubGetWorkspace("other-proj", "myotherworkspace", LOGGED_IN_USER_EMAIL); + ClusterLocalizeResponse resp = clusterController.localize("other-proj", req).getBody(); + verify(mockLeoNotebooksClient) + .localize(eq("other-proj"), eq(getClusterName()), mapCaptor.capture()); Map localizeMap = mapCaptor.getValue(); assertThat(localizeMap.keySet()) .containsExactly( - "workspaces/proj__wsid/foo.ipynb", - "workspaces/proj__wsid/.all_of_us_config.json", - "workspaces_playground/proj__wsid/.all_of_us_config.json"); + "workspaces/myotherworkspace/foo.ipynb", + "workspaces/myotherworkspace/.all_of_us_config.json", + "workspaces_playground/myotherworkspace/.all_of_us_config.json"); assertThat(localizeMap) .containsEntry( - "workspaces/proj__wsid/foo.ipynb", "gs://workspace-bucket/notebooks/foo.ipynb"); - assertThat(resp.getClusterLocalDirectory()).isEqualTo("workspaces/proj__wsid"); + "workspaces/myotherworkspace/foo.ipynb", "gs://workspace-bucket/notebooks/foo.ipynb"); + assertThat(resp.getClusterLocalDirectory()).isEqualTo("workspaces/myotherworkspace"); JSONObject aouJson = - dataUriToJson(localizeMap.get("workspaces/proj__wsid/.all_of_us_config.json")); + dataUriToJson(localizeMap.get("workspaces/myotherworkspace/.all_of_us_config.json")); assertThat(aouJson.getString("BILLING_CLOUD_PROJECT")).isEqualTo("other-proj"); } @Test public void testLocalize_noNotebooks() { ClusterLocalizeRequest req = new ClusterLocalizeRequest(); - req.setWorkspaceNamespace(WORKSPACE_NS); - req.setWorkspaceId(WORKSPACE_ID); req.setPlaygroundMode(false); stubGetWorkspace(WORKSPACE_NS, WORKSPACE_ID, LOGGED_IN_USER_EMAIL); - ClusterLocalizeResponse resp = - clusterController.localize(BILLING_PROJECT_ID, "cluster", req).getBody(); - verify(notebookService).localize(eq(BILLING_PROJECT_ID), eq("cluster"), mapCaptor.capture()); + ClusterLocalizeResponse resp = clusterController.localize(BILLING_PROJECT_ID, req).getBody(); + verify(mockLeoNotebooksClient) + .localize(eq(BILLING_PROJECT_ID), eq(getClusterName()), mapCaptor.capture()); // Config files only. Map localizeMap = mapCaptor.getValue(); assertThat(localizeMap.keySet()) .containsExactly( - "workspaces_playground/wsid/.all_of_us_config.json", - "workspaces/wsid/.all_of_us_config.json"); - assertThat(resp.getClusterLocalDirectory()).isEqualTo("workspaces/wsid"); + "workspaces_playground/myfirstworkspace/.all_of_us_config.json", + "workspaces/myfirstworkspace/.all_of_us_config.json"); + assertThat(resp.getClusterLocalDirectory()).isEqualTo("workspaces/myfirstworkspace"); } @Test - public void listCluster_validateActiveBilling() { + public void GetCluster_validateActiveBilling() { doThrow(ForbiddenException.class) - .when(workspaceService) + .when(mockWorkspaceService) .validateActiveBilling(WORKSPACE_NS, WORKSPACE_ID); - assertThrows( - ForbiddenException.class, () -> clusterController.listClusters(WORKSPACE_NS, WORKSPACE_ID)); + assertThrows(ForbiddenException.class, () -> clusterController.getCluster(WORKSPACE_NS)); } @Test - public void listCluster_validateActiveBilling_checkAccessFirst() { - doThrow(ForbiddenException.class) - .when(workspaceService) - .validateActiveBilling(WORKSPACE_NS, WORKSPACE_ID); - + public void getCluster_validateActiveBilling_checkAccessFirst() { doThrow(ForbiddenException.class) - .when(workspaceService) - .enforceWorkspaceAccessLevel(WORKSPACE_NS, WORKSPACE_ID, WorkspaceAccessLevel.READER); + .when(mockWorkspaceService) + .enforceWorkspaceAccessLevel(WORKSPACE_NS, WORKSPACE_ID, WorkspaceAccessLevel.WRITER); - assertThrows( - ForbiddenException.class, () -> clusterController.listClusters(WORKSPACE_NS, WORKSPACE_ID)); - verify(workspaceService, never()).validateActiveBilling(anyString(), anyString()); + assertThrows(ForbiddenException.class, () -> clusterController.getCluster(WORKSPACE_NS)); + verify(mockWorkspaceService, never()).validateActiveBilling(anyString(), anyString()); } @Test public void localize_validateActiveBilling() { doThrow(ForbiddenException.class) - .when(workspaceService) + .when(mockWorkspaceService) .validateActiveBilling(WORKSPACE_NS, WORKSPACE_ID); - ClusterLocalizeRequest req = - new ClusterLocalizeRequest().workspaceNamespace(WORKSPACE_NS).workspaceId(WORKSPACE_ID); - - assertThrows(ForbiddenException.class, () -> clusterController.localize("y", "z", req)); + ClusterLocalizeRequest req = new ClusterLocalizeRequest(); + assertThrows(ForbiddenException.class, () -> clusterController.localize(WORKSPACE_NS, req)); } @Test public void localize_validateActiveBilling_checkAccessFirst() { doThrow(ForbiddenException.class) - .when(workspaceService) - .validateActiveBilling(WORKSPACE_NS, WORKSPACE_ID); - - doThrow(ForbiddenException.class) - .when(workspaceService) - .enforceWorkspaceAccessLevel(WORKSPACE_NS, WORKSPACE_ID, WorkspaceAccessLevel.READER); + .when(mockWorkspaceService) + .enforceWorkspaceAccessLevel(WORKSPACE_NS, WORKSPACE_ID, WorkspaceAccessLevel.WRITER); - ClusterLocalizeRequest req = - new ClusterLocalizeRequest().workspaceNamespace(WORKSPACE_NS).workspaceId(WORKSPACE_ID); + ClusterLocalizeRequest req = new ClusterLocalizeRequest(); - assertThrows(ForbiddenException.class, () -> clusterController.localize("y", "z", req)); - verify(workspaceService, never()).validateActiveBilling(anyString(), anyString()); + assertThrows(ForbiddenException.class, () -> clusterController.localize(WORKSPACE_NS, req)); + verify(mockWorkspaceService, never()).validateActiveBilling(anyString(), anyString()); } private void createUser(String email) { diff --git a/ui/src/app/pages/analysis/interactive-notebook.tsx b/ui/src/app/pages/analysis/interactive-notebook.tsx index a7048400b0e..320f61310cf 100644 --- a/ui/src/app/pages/analysis/interactive-notebook.tsx +++ b/ui/src/app/pages/analysis/interactive-notebook.tsx @@ -10,12 +10,11 @@ import {EditComponentReact} from 'app/icons/edit'; import {PlaygroundModeIcon} from 'app/icons/playground-mode-icon'; import {ConfirmPlaygroundModeModal} from 'app/pages/analysis/confirm-playground-mode-modal'; import {NotebookInUseModal} from 'app/pages/analysis/notebook-in-use-modal'; -import {notebooksClusterApi} from 'app/services/notebooks-swagger-fetch-clients'; -import {clusterApi, workspacesApi} from 'app/services/swagger-fetch-clients'; +import {workspacesApi} from 'app/services/swagger-fetch-clients'; import colors, {colorWithWhiteness} from 'app/styles/colors'; import {reactStyles, ReactWrapperBase, withCurrentWorkspace, withUrlParams} from 'app/utils'; import {AnalyticsTracker} from 'app/utils/analytics'; -import {isAbortError} from 'app/utils/errors'; +import {ClusterInitializer} from 'app/utils/cluster-initializer'; import {navigate, userProfileStore} from 'app/utils/navigation'; import {ACTION_DISABLED_INVALID_BILLING} from 'app/utils/strings'; import {WorkspaceData} from 'app/utils/workspace-data'; @@ -170,35 +169,13 @@ export const InteractiveNotebook = fp.flow(withUrlParams(), withCurrentWorkspace this.aborter.abort(); } - private runCluster(onClusterReady: Function): void { - const retry = () => { - this.runClusterTimer = setTimeout(() => this.runCluster(onClusterReady), 5000); - }; - - clusterApi().listClusters(this.props.urlParams.ns, this.props.urlParams.wsid, { - signal: this.aborter.signal - }).then((body) => { - const cluster = body.defaultCluster; - this.setState({clusterStatus: cluster.status}); - - if (cluster.status === ClusterStatus.Stopped) { - notebooksClusterApi() - .startCluster(cluster.clusterNamespace, cluster.clusterName); - } - - if (cluster.status === ClusterStatus.Running) { - onClusterReady(); - } else { - retry(); - } - }) - .catch((e: Error) => { - if (isAbortError(e)) { - return; - } - // TODO(RW-3097): Backoff, or don't retry forever. - retry(); - }); + private async runCluster(onClusterReady: Function): Promise { + await ClusterInitializer.initialize({ + workspaceNamespace: this.props.urlParams.ns, + onStatusUpdate: (status) => this.setState({clusterStatus: status}), + abortSignal: this.aborter.signal + }); + onClusterReady(); } private startEditMode() { diff --git a/ui/src/app/pages/analysis/notebook-redirect.tsx b/ui/src/app/pages/analysis/notebook-redirect.tsx index dacafc6d121..f815a39e0ef 100644 --- a/ui/src/app/pages/analysis/notebook-redirect.tsx +++ b/ui/src/app/pages/analysis/notebook-redirect.tsx @@ -3,7 +3,6 @@ import * as fp from 'lodash/fp'; import * as React from 'react'; import Iframe from 'react-iframe'; -import {isAbortError, reportError} from 'app/utils/errors'; import {urlParamsStore} from 'app/utils/navigation'; import {fetchAbortableRetry} from 'app/utils/retry'; @@ -13,7 +12,7 @@ import {Modal, ModalBody, ModalFooter, ModalTitle} from 'app/components/modals'; import {Spinner} from 'app/components/spinners'; import {NotebookIcon} from 'app/icons/notebook-icon'; import {ReminderIcon} from 'app/icons/reminder'; -import {jupyterApi, notebooksApi, notebooksClusterApi} from 'app/services/notebooks-swagger-fetch-clients'; +import {jupyterApi, notebooksApi} from 'app/services/notebooks-swagger-fetch-clients'; import {clusterApi} from 'app/services/swagger-fetch-clients'; import colors, {colorWithWhiteness} from 'app/styles/colors'; import { @@ -23,6 +22,7 @@ import { withQueryParams, withUserProfile } from 'app/utils'; +import {ClusterInitializer} from 'app/utils/cluster-initializer'; import {Kernels} from 'app/utils/notebook-kernels'; import {WorkspaceData} from 'app/utils/workspace-data'; import {environment} from 'environments/environment'; @@ -217,7 +217,6 @@ interface Props { profileState: {profile: Profile, reload: Function, updateCache: Function}; } -const clusterPollingTimeoutMillis = 15000; const clusterApiRetryTimeoutMillis = 10000; const clusterApiRetryAttempts = 5; const redirectMillis = 1000; @@ -239,10 +238,10 @@ export const NotebookRedirect = fp.flow(withUserProfile(), withCurrentWorkspace( }; } - private isClusterInProgress(cluster: Cluster): boolean { - return cluster.status === ClusterStatus.Starting || - cluster.status === ClusterStatus.Stopping || - cluster.status === ClusterStatus.Stopped; + private isClusterInProgress(status: ClusterStatus): boolean { + return status === ClusterStatus.Starting || + status === ClusterStatus.Stopping || + status === ClusterStatus.Stopped; } private isCreatingNewNotebook() { @@ -253,12 +252,6 @@ export const NotebookRedirect = fp.flow(withUserProfile(), withCurrentWorkspace( return this.props.queryParams.playgroundMode === 'true'; } - private async getDefaultCluster(billingProjectId) { - const resp = await this.clusterRetry(() => clusterApi().listClusters( - billingProjectId, this.props.workspace.id, {signal: this.aborter.signal})); - return resp.defaultCluster; - } - private async clusterRetry(f: () => Promise): Promise { return await fetchAbortableRetry(f, clusterApiRetryTimeoutMillis, clusterApiRetryAttempts); } @@ -308,54 +301,25 @@ export const NotebookRedirect = fp.flow(withUserProfile(), withCurrentWorkspace( this.aborter.abort(); } + onClusterStatusUpdate(status: ClusterStatus) { + if (this.isClusterInProgress(status)) { + this.incrementProgress(Progress.Resuming); + } else { + this.incrementProgress(Progress.Initializing); + } + } + // check the cluster's status: if it's Running we can connect the notebook to it // otherwise we need to start polling private async initializeClusterStatusChecking(billingProjectId) { this.incrementProgress(Progress.Unknown); - try { - const cluster = await this.getDefaultCluster(billingProjectId); - if (cluster.status === ClusterStatus.Running) { - await this.connectToRunningCluster(cluster); - } else { - if (this.isClusterInProgress(cluster)) { - this.incrementProgress(Progress.Resuming); - } else { - this.incrementProgress(Progress.Initializing); - } - - this.pollTimer = setTimeout(() => this.pollForRunningCluster(billingProjectId), clusterPollingTimeoutMillis); - } - } catch (error) { - if (!isAbortError(error)) { - reportError(error); - this.setState({showErrorModal: true}); - throw error; - } - } - } - private async pollForRunningCluster(billingProjectId) { - try { - const cluster = await this.getDefaultCluster(billingProjectId); - if (cluster.status === ClusterStatus.Running) { - await this.connectToRunningCluster(cluster); - } else { - // re-start cluster if stopped, and try again in the next polling interval - if (cluster.status === ClusterStatus.Stopped) { - await this.clusterRetry(() => notebooksClusterApi().startCluster( - cluster.clusterNamespace, cluster.clusterName, {signal: this.aborter.signal})); - } - - // TODO(RW-3097): Backoff, or don't retry forever. - this.pollTimer = setTimeout(() => this.pollForRunningCluster(billingProjectId), clusterPollingTimeoutMillis); - } - } catch (error) { - if (!isAbortError(error)) { - reportError(error); - this.setState({showErrorModal: true}); - throw error; - } - } + const cluster = await ClusterInitializer.initialize({ + workspaceNamespace: billingProjectId, + onStatusUpdate: (status) => this.onClusterStatusUpdate(status), + abortSignal: this.aborter.signal + }); + await this.connectToRunningCluster(cluster); } private async connectToRunningCluster(cluster) { @@ -416,8 +380,7 @@ export const NotebookRedirect = fp.flow(withUserProfile(), withCurrentWorkspace( private async localizeNotebooks(cluster: Cluster, notebookNames: Array) { const {workspace} = this.props; const resp = await this.clusterRetry(() => clusterApi().localize( - cluster.clusterNamespace, cluster.clusterName, { - workspaceNamespace: workspace.namespace, workspaceId: workspace.id, + workspace.namespace, { notebookNames, playgroundMode: this.isPlaygroundMode() }, {signal: this.aborter.signal})); diff --git a/ui/src/app/pages/analysis/reset-cluster-button.spec.tsx b/ui/src/app/pages/analysis/reset-cluster-button.spec.tsx index cee1a079627..96c4de3aefa 100644 --- a/ui/src/app/pages/analysis/reset-cluster-button.spec.tsx +++ b/ui/src/app/pages/analysis/reset-cluster-button.spec.tsx @@ -19,8 +19,7 @@ describe('ResetClusterButton', () => { beforeEach(() => { props = { - billingProjectId: "billing-project-123", - workspaceFirecloudName: "workspace-name-123" + workspaceNamespace: 'billing-project-123', }; registerApiClient(ClusterApi, new ClusterApiStub()); @@ -45,4 +44,5 @@ describe('ResetClusterButton', () => { expect(spy).toHaveBeenCalled(); expect(wrapper.find('Modal[data-test-id="reset-notebook-modal"]').length).toBe(0); }); + }); diff --git a/ui/src/app/pages/analysis/reset-cluster-button.tsx b/ui/src/app/pages/analysis/reset-cluster-button.tsx index af4b7c5d4fc..fc41ab0b3db 100644 --- a/ui/src/app/pages/analysis/reset-cluster-button.tsx +++ b/ui/src/app/pages/analysis/reset-cluster-button.tsx @@ -4,20 +4,18 @@ import {Button} from 'app/components/buttons'; import {Modal, ModalBody, ModalFooter, ModalTitle} from 'app/components/modals'; import {TooltipTrigger} from 'app/components/popups'; import {clusterApi} from 'app/services/swagger-fetch-clients'; - import { - Cluster, + ClusterInitializationAbortedError, + ClusterInitializationFailedError, + ClusterInitializer, +} from 'app/utils/cluster-initializer'; +import {reportError} from 'app/utils/errors'; +import { ClusterStatus, } from 'generated/fetch/api'; - - -export const TRANSITIONAL_STATUSES = new Set([ - ClusterStatus.Creating, - ClusterStatus.Starting, - ClusterStatus.Stopping, - ClusterStatus.Deleting, -]); +const RESTART_LABEL = 'Reset server'; +const CREATE_LABEL = 'Create server'; const styles = { notebookSettings: { @@ -26,55 +24,132 @@ const styles = { }; export interface Props { - billingProjectId: string; - workspaceFirecloudName: string; + workspaceNamespace: string; } interface State { - cluster: Cluster; + clusterStatus?: ClusterStatus; + isPollingCluster: boolean; resetClusterPending: boolean; resetClusterModal: boolean; - clusterDeletionFailure: boolean; + resetClusterFailure: boolean; } export class ResetClusterButton extends React.Component { - - private pollClusterTimer: NodeJS.Timer; + private aborter = new AbortController(); constructor(props) { super(props); this.state = { - cluster: null, + clusterStatus: null, + isPollingCluster: true, resetClusterPending: false, resetClusterModal: false, - clusterDeletionFailure: true, + resetClusterFailure: true, }; } componentDidMount() { - this.pollCluster(this.props.billingProjectId); + this.createClusterInitializer(false); + } + + async createClusterInitializer(allowClusterActions: boolean) { + const maxActionCount = allowClusterActions ? 1 : 0; + + // Kick off an initializer which will poll for cluster status. + try { + this.setState({isPollingCluster: true, clusterStatus: null}); + await ClusterInitializer.initialize({ + workspaceNamespace: this.props.workspaceNamespace, + onStatusUpdate: (clusterStatus: ClusterStatus) => { + if (this.aborter.signal.aborted) { + // IF we've been unmounted, don't try to update state. + return; + } + this.setState({ + clusterStatus: clusterStatus, + }); + }, + abortSignal: this.aborter.signal, + // For the reset button, we never want to affect the cluster state. With the maxFooCount set + // to zero, the initializer will reject the promise when it reaches a non-transitional state. + maxDeleteCount: maxActionCount, + maxCreateCount: maxActionCount, + maxResumeCount: maxActionCount, + }); + this.setState({isPollingCluster: false}); + } catch (e) { + if (e instanceof ClusterInitializationAbortedError) { + // Silently return if the init was aborted -- we've likely been unmounted and cannot call + // setState anymore. + return; + } else if (e instanceof ClusterInitializationFailedError) { + this.setState({clusterStatus: e.cluster ? e.cluster.status : null, isPollingCluster: false}); + } else { + // We only expect one of the above errors, so report any other types of errors to + // Stackdriver. + reportError(e); + this.setState({ + isPollingCluster: false + }); + } + } } componentWillUnmount() { - if (this.pollClusterTimer) { - clearTimeout(this.pollClusterTimer); + this.aborter.abort(); + } + + private createTooltip(content: React.ReactFragment, children: React.ReactFragment): React.ReactFragment { + return + {children} + ; + } + + private createButton(label: string, enabled: boolean, callback: () => void): React.ReactFragment { + return ; + } + + createButtonAndLabel(): (React.ReactFragment) { + if (this.state.isPollingCluster) { + const tooltipContent =
+ Your notebook server is still being provisioned.
+ {this.state.clusterStatus != null && + (detailed status: {this.state.clusterStatus}) + } +
; + return this.createTooltip( + tooltipContent, + this.createButton(RESTART_LABEL, false, null)); + } else if (this.state.clusterStatus === null) { + // If the initializer has completed and the status is null, it means that + // a cluster doesn't exist for this workspace. + return this.createTooltip( + 'You do not currently have an active notebook server for this workspace.', + this.createButton(CREATE_LABEL, true, () => this.createOrResetCluster())); + } else { + // We usually reach this state if the cluster is at a "terminal" status and the initializer has + // completed. This may be ClusterStatus.Stopped, ClusterStatus.Running, ClusterStatus.Error, + // etc. + const tooltipContent =
+ Your notebook server is in the following state: {this.state.clusterStatus}. +
; + return this.createTooltip( + tooltipContent, + this.createButton(RESTART_LABEL, true, () => this.openResetClusterModal())); } } render() { return
- - - + {this.createButtonAndLabel()}
{this.state.resetClusterModal && { - {this.state.clusterDeletionFailure ? + {this.state.resetClusterFailure ?
Could not reset your notebook server.
: undefined}
@@ -109,41 +184,24 @@ export class ResetClusterButton extends React.Component { this.setState({ resetClusterPending: false, resetClusterModal: true, - clusterDeletionFailure: false + resetClusterFailure: false }); } - resetCluster(): void { - this.setState({ resetClusterPending: true }); - - const clusterBillingProjectId = this.state.cluster.clusterNamespace; - clusterApi().deleteCluster(this.state.cluster.clusterNamespace, this.state.cluster.clusterName) - .then(() => { - this.setState({cluster: null, resetClusterPending: false, resetClusterModal: false}); - this.pollCluster(clusterBillingProjectId); - }) - .catch(() => { - this.setState({resetClusterPending: false, clusterDeletionFailure: true}); - }); - } + async createOrResetCluster(): Promise { + try { + this.setState({resetClusterPending: true}); + if (this.state.clusterStatus === null) { + await clusterApi().createCluster(this.props.workspaceNamespace); + } else { + await clusterApi().deleteCluster(this.props.workspaceNamespace); + } + this.setState({resetClusterPending: false, resetClusterModal: false}); - private pollCluster(billingProjectId): void { - const repoll = () => { - this.pollClusterTimer = setTimeout(() => this.pollCluster(billingProjectId), 15000); - }; - clusterApi().listClusters(billingProjectId, this.props.workspaceFirecloudName) - .then((body) => { - const cluster = body.defaultCluster; - if (TRANSITIONAL_STATUSES.has(cluster.status)) { - repoll(); - return; - } - this.setState({cluster: cluster}); - }) - .catch(() => { - // Also retry on errors - repoll(); - }); + this.createClusterInitializer(true); + + } catch { + this.setState({resetClusterPending: false, resetClusterFailure: true}); + } } } - diff --git a/ui/src/app/pages/workspace/workspace-about.tsx b/ui/src/app/pages/workspace/workspace-about.tsx index 370810c3420..4e3a1ddd7fe 100644 --- a/ui/src/app/pages/workspace/workspace-about.tsx +++ b/ui/src/app/pages/workspace/workspace-about.tsx @@ -241,8 +241,7 @@ export const WorkspaceAbout = fp.flow(withUserProfile(), withUrlParams(), withCd fp.capitalize(workspace.dataAccessLevel.toString()) : 'Loading...'} {!!this.workspaceClusterBillingProjectId() && - } + } {sharing && { + + beforeEach(() => { + jest.useFakeTimers(); + + registerApiClient(ClusterApi, new ClusterApiStub()); + registerApiClientNotebooks(NotebooksClusterApi, new NotebooksClusterApiStub()); + + mockGetCluster = jest.spyOn(clusterApi(), 'getCluster'); + mockCreateCluster = jest.spyOn(clusterApi(), 'createCluster'); + mockDeleteCluster = jest.spyOn(clusterApi(), 'deleteCluster'); + mockStartCluster = jest.spyOn(notebooksClusterApi(), 'startCluster'); + }); + + afterEach(() => { + jest.useRealTimers(); + jest.clearAllMocks(); + }); + + const mockGetClusterCalls = (baseOverrides: Array>) => { + const clusters: Array = baseOverrides.map( + override => { + return {...baseCluster, ...override}; + }); + for (const cluster of clusters) { + mockGetCluster.mockImplementationOnce((workspaceNamespace) => { + return cluster; + }); + } + }; + + /** + * This helper function allows us to call Jest's mock-timer progression function, runAllTimers, + * an arbitrary number of times until the given Promise is settled. + * + * This is helpful for testing the initializer, since it has a Promise-based API but relies on + * a polling strategy using setTimeout calls in order to wait for the cluster to become ready. + */ + const runTimersUntilSettled = async(p: Promise, maxLoops: number = 20) => { + let isSettled = false; + p.then(() => isSettled = true).catch((e) => { + isSettled = true; + }); + let i = 0; + while (!isSettled) { + i++; + if (i > maxLoops) { + throw new Error('Max number of timer cycles reached waiting for Promise to settle'); + } + + await new Promise(setImmediate); + jest.runAllTimers(); + } + }; + + const runInitializerAndTimers = async(options?: Partial, maxLoops?: number): Promise => { + const clusterPromise = ClusterInitializer.initialize({ + workspaceNamespace: 'aou-rw-12345', + ...options + }); + await runTimersUntilSettled(clusterPromise, maxLoops); + return await clusterPromise; + }; + + it('should resolve promise if cluster is in ready state', async() => { + // This tests the simplest case of the initializer. No polling necessary. + mockGetClusterCalls([baseCluster]); + const cluster = await runInitializerAndTimers(); + expect(cluster.status).toEqual(ClusterStatus.Running); + }); + + it('should resume cluster if it is initially stopping / stopped', async() => { + // This test case also includes some repeated statuses, since that is a more realistic + // reflection of how we poll Leo & get occasional new statuses. + mockGetClusterCalls([ + {status: ClusterStatus.Stopping}, + {status: ClusterStatus.Stopping}, + {status: ClusterStatus.Stopped}, + {status: ClusterStatus.Stopped}, + // Here is where we expect, chronologically, a call to startCluster. The next two + // cluster statuses mock out what we might expect to see after that call. + {status: ClusterStatus.Starting}, + {status: ClusterStatus.Starting}, + {status: ClusterStatus.Starting}, + {status: ClusterStatus.Running} + ]); + const cluster = await runInitializerAndTimers(); + expect(mockStartCluster).toHaveBeenCalled(); + expect(cluster.status).toEqual(ClusterStatus.Running); + }); + + it('should call callback with cluster status', async() => { + mockGetClusterCalls([ + {status: ClusterStatus.Stopped}, + ]); + mockGetCluster.mockRejectedValueOnce(new Response(null, {status: 404})); + mockGetClusterCalls([ + {status: ClusterStatus.Starting}, + {status: ClusterStatus.Starting}, + {status: ClusterStatus.Running} + ]); + const statuses = []; + await runInitializerAndTimers({onStatusUpdate: (status) => statuses.push(status)}); + + expect(statuses).toEqual([ + ClusterStatus.Stopped, + // A null value is passed when a cluster is not found. + null, + // Note: onStatusUpdate will be called for every status received, not just when the status + // value is changed. + ClusterStatus.Starting, + ClusterStatus.Starting, + ClusterStatus.Running] + ); + }); + + it('should create cluster if it is initially nonexistent', async() => { + mockGetCluster.mockRejectedValueOnce(new Response(null, {status: 404})); + mockCreateCluster.mockImplementationOnce(async(workspaceNamespace) => { + return {status: ClusterStatus.Creating}; + }); + mockGetClusterCalls([ + {status: ClusterStatus.Starting}, + {status: ClusterStatus.Running} + ]); + const cluster = await runInitializerAndTimers(); + + expect(mockCreateCluster).toHaveBeenCalled(); + expect(cluster.status).toEqual(ClusterStatus.Running); + }); + + it('should delete cluster if in an error state', async() => { + // A cluster in an error state should trigger a deletion request. + mockGetClusterCalls([ + {status: ClusterStatus.Creating}, + {status: ClusterStatus.Error}, + ]); + mockDeleteCluster.mockImplementationOnce(async(workspaceNamespace) => { + return {}; + }); + mockGetClusterCalls([ + {status: ClusterStatus.Deleting}, + {status: ClusterStatus.Deleting}, + ]); + // After some period of "deleting" status, we expect the cluster to become nonexistent... + mockGetCluster.mockRejectedValueOnce(new Response(null, {status: 404})); + // which should trigger a creation request... + mockCreateCluster.mockImplementationOnce(async(workspaceNamespace) => { + return {status: ClusterStatus.Creating}; + }); + // and eventually give us a good cluster. + mockGetClusterCalls([ + {status: ClusterStatus.Starting}, + {status: ClusterStatus.Running} + ]); + + const cluster = await runInitializerAndTimers(); + + expect(mockDeleteCluster).toHaveBeenCalled(); + expect(mockCreateCluster).toHaveBeenCalled(); + expect(cluster.status).toEqual(ClusterStatus.Running); + }); + + it('should recover from intermittent 500s', async() => { + mockGetClusterCalls([ + {status: ClusterStatus.Creating}, + {status: ClusterStatus.Creating}, + ]); + mockGetCluster.mockRejectedValueOnce(new Response(null, {status: 503})); + mockGetCluster.mockRejectedValueOnce(new Response(null, {status: 503})); + mockGetClusterCalls([ + {status: ClusterStatus.Running}, + ]); + + const cluster = await runInitializerAndTimers(); + + expect(cluster.status).toEqual(ClusterStatus.Running); + }); + + it('should give up after too many server errors', async() => { + mockGetClusterCalls([ + {status: ClusterStatus.Creating}, + ]); + for (let i = 0; i < 20; i++) { + mockGetCluster.mockRejectedValueOnce(new Response(null, {status: 503})); + } + + // Tell Jest that we plan to have 1 assertion. This ensures that the test won't + // pass if the promise fails. + expect.assertions(1); + try { + await runInitializerAndTimers(); + } catch (error) { + expect(error.message).toMatch(/max server error count/i); + } + }); + + it('should timeout after max delay', async() => { + mockGetCluster.mockImplementation(async(workspaceNamespace) => { + return {status: ClusterStatus.Starting}; + }); + + // There's some nuance / awkwardness to this test: the ClusterInitializer uses Date.now() to get + // the current timestamp, but Jest doesn't support fake clock functionality (see + // https://github.com/facebook/jest/issues/2684). So we just set a very quick timeout here to + // ensure the threshold is reached after a couple polling loops. + expect.assertions(1); + try { + await runInitializerAndTimers({overallTimeout: 30}, /* maxLoops */ 20000); + } catch (error) { + expect(error.message).toMatch(/max time allowed/i); + } + }); + + it('should reject promise after abort signal', async() => { + mockGetCluster.mockImplementation(async(workspaceNamespace) => { + return {status: ClusterStatus.Starting}; + }); + const aborter = new AbortController(); + + const initializePromise = runInitializerAndTimers({abortSignal: aborter.signal}); + // Wait a reasonably-short amount of time, at least one polling delay period, before sending + // an abort signal. + await new Promise(resolve => setTimeout(resolve, 20)); + aborter.abort(); + + expect.assertions(1); + try { + await initializePromise; + } catch (error) { + expect(error.message).toMatch(/aborted/i); + } + }); + + it('should respect the maxDeleteCount option', async() => { + // Mock out getCluster API responses which simulate a cluster in an error state, which is then + // reset, but ends up in an error state again. This scenario should warrant two deleteCluster + // calls, but the initializer is configured to max out at 1 and should return an error. + mockGetClusterCalls([ + {status: ClusterStatus.Error}, + {status: ClusterStatus.Deleting}, + {status: ClusterStatus.Creating}, + {status: ClusterStatus.Error}, + ]); + mockDeleteCluster.mockImplementation(async(workspaceNamespace) => { + return {}; + }); + mockCreateCluster.mockImplementation(async(workspaceNamespace) => { + return {status: ClusterStatus.Creating}; + }); + + expect.assertions(2); + try { + await runInitializerAndTimers({maxDeleteCount: 1}); + } catch (error) { + expect(mockDeleteCluster).toHaveBeenCalledTimes(1); + expect(error.message).toMatch(/max cluster delete count/i); + } + }); + + it('should respect the maxCreateCount option', async() => { + // Ensure that the initializer won't take action on a NOT_FOUND cluster if the maxCreateCount + // is set to disallow create requests. + mockGetCluster.mockRejectedValueOnce(new Response(null, {status: 404})); + try { + await runInitializerAndTimers({maxCreateCount: 0}); + } catch (error) { + expect(mockCreateCluster).not.toHaveBeenCalled(); + expect(error.message).toMatch(/max cluster create count/i); + } + }); + + it('should respect the maxResumeCount option', async() => { + mockGetClusterCalls([ + {status: ClusterStatus.Stopped}, + ]); + try { + await runInitializerAndTimers({maxResumeCount: 0}); + } catch (error) { + expect(mockCreateCluster).not.toHaveBeenCalled(); + expect(error.message).toMatch(/max cluster resume count/i); + } + }); + +}); diff --git a/ui/src/app/utils/cluster-initializer.tsx b/ui/src/app/utils/cluster-initializer.tsx new file mode 100644 index 00000000000..e180c1e377f --- /dev/null +++ b/ui/src/app/utils/cluster-initializer.tsx @@ -0,0 +1,335 @@ +import {notebooksClusterApi} from 'app/services/notebooks-swagger-fetch-clients'; +import {clusterApi} from 'app/services/swagger-fetch-clients'; +import {isAbortError, reportError} from 'app/utils/errors'; +import {Cluster, ClusterStatus} from 'generated/fetch'; + +// We're only willing to wait 20 minutes total for a cluster to initialize. After that we return +// a rejected promise no matter what. +const DEFAULT_OVERALL_TIMEOUT = 1000 * 60 * 20; +const DEFAULT_INITIAL_POLLING_DELAY = 2000; +const DEFAULT_MAX_POLLING_DELAY = 15000; +// By default, we're willing to retry twice on each of the state-modifying API calls, to allow +// for some resilience to errored-out clusters, while avoiding situations where we end up in an +// endless create-error-delete loop. +const DEFAULT_MAX_CREATE_COUNT = 2; +const DEFAULT_MAX_DELETE_COUNT = 2; +const DEFAULT_MAX_RESUME_COUNT = 2; +// We allow a certain # of server errors to occur before we error-out of the initialization flow. +const DEFAULT_MAX_SERVER_ERROR_COUNT = 10; + +export class ClusterInitializationFailedError extends Error { + constructor(message: string, public readonly cluster?: Cluster) { + super(message); + // Unfortunately, error subclassing is broken in TypeScript without this workaround. See + // https://github.com/Microsoft/TypeScript/wiki/Breaking-Changes#extending-built-ins-like-error-array-and-map-may-no-longer-work + // for details. + Object.setPrototypeOf(this, ClusterInitializationFailedError.prototype); + + this.name = 'ClusterInitializationFailedError'; + } +} + +export class ExceededActionCountError extends ClusterInitializationFailedError { + constructor(message, cluster?: Cluster) { + super(message, cluster); + Object.setPrototypeOf(this, ExceededActionCountError.prototype); + + this.name = 'ExceededActionCountError'; + } +} + +export class ExceededErrorCountError extends ClusterInitializationFailedError { + constructor(message, cluster?: Cluster) { + super(message, cluster); + Object.setPrototypeOf(this, ExceededErrorCountError.prototype); + + this.name = 'ExceededErrorCountError'; + } +} + +export class ClusterInitializationAbortedError extends ClusterInitializationFailedError { + constructor(message, cluster?: Cluster) { + super(message, cluster); + Object.setPrototypeOf(this, ClusterInitializationAbortedError.prototype); + + this.name = 'ClusterInitializationAbortedError'; + } +} + +export interface ClusterInitializerOptions { + // Core options. Most callers should provide these. + // + // The workspace namespace to initialize a cluster for. + workspaceNamespace: string; + // Callback which is called every time the cluster updates its status. When no cluster is found, + // the callback is called with a null value. + onStatusUpdate?: (ClusterStatus?) => void; + // An optional abort signal which allows the caller to abort the initialization process, including + // cancelling any outstanding Ajax requests. + abortSignal?: AbortSignal; + + // Override options. These options all have sensible defaults, but may be overridden for testing + // or special scenarios (such as an initialization flow which should not take any actions). + initialPollingDelay?: number; + maxPollingDelay?: number; + overallTimeout?: number; + maxCreateCount?: number; + maxDeleteCount?: number; + maxResumeCount?: number; + maxServerErrorCount?: number; +} + +const DEFAULT_OPTIONS: Partial = { + onStatusUpdate: () => {}, + initialPollingDelay: DEFAULT_INITIAL_POLLING_DELAY, + maxPollingDelay: DEFAULT_MAX_POLLING_DELAY, + overallTimeout: DEFAULT_OVERALL_TIMEOUT, + maxCreateCount: DEFAULT_MAX_CREATE_COUNT, + maxDeleteCount: DEFAULT_MAX_DELETE_COUNT, + maxResumeCount: DEFAULT_MAX_RESUME_COUNT, + maxServerErrorCount: DEFAULT_MAX_SERVER_ERROR_COUNT +}; + +/** + * A controller class implementing client-side logic to initialize a Leonardo cluster. This class + * will continue to poll the getCluster endpoint, taking certain actions as required to nudge the + * cluster towards a running state, and will eventually resolve the Promise with a running cluster, + * or otherwise reject it with information about the failure mode. + * + * This is an unusually heavyweight controller class on the client side. It's worth noting a couple + * reasons why we ended up with this design: + * - Cluster initialization can take up to 10 minutes, which is beyond the scope of a single + * App Engine server-side request timeout. To reliably implement this control on the server side + * would likely require new database persistence, tasks queues, and additional APIs in order to + * provide the client with status updates. + * - Ultimately, we might expect this type of functionality ("Get me a cluster for workspace X and + * bring it to a running state") to exist as part of the Leonardo application. So rather than + * build our own server-side equivalent, we adopted a client-side solution as a holdover. + */ +export class ClusterInitializer { + // Core properties for interacting with the caller and the cluster APIs. + private readonly workspaceNamespace: string; + private readonly onStatusUpdate: (ClusterStatus?) => void; + private readonly abortSignal?: AbortSignal; + + // Properties to track & control the polling loop. We use a capped exponential backoff strategy + // and a series of "maxFoo" limits to ensure the initialization flow doesn't get out of control. + private readonly maxDelay: number; + private readonly overallTimeout: number; + private readonly maxCreateCount: number; + private readonly maxDeleteCount: number; + private readonly maxResumeCount: number; + private readonly maxServerErrorCount: number; + + // Properties to track progress, actions taken, and errors encountered. + private currentDelay: number; + private createCount = 0; + private deleteCount = 0; + private resumeCount = 0; + private serverErrorCount = 0; + private initializeStartTime?: number; + // The latest cluster retrieved from getCluster. If the last getCluster call returned a NOT_FOUND + // response, this will be null. + private currentCluster?: Cluster; + + // Properties to control the initialization and promise resolution flow. + // + // The resolve and reject function from the promise returned from the call to .run(). We use a + // deferred-style approach in this class, which allows us to provide a Promise-based API on the + // .run() method, but to call the resolve() or reject() method from anywhere in this class. + private resolve: (cluster?: Cluster | PromiseLike) => void; + private reject: (error: Error) => void; + + /** + * Creates and runs a cluster initializer. This is the main public entry point to this class. + * @param options + */ + public static initialize(options: ClusterInitializerOptions): Promise { + return new ClusterInitializer(options).run(); + } + + private constructor(options: ClusterInitializerOptions) { + // Assign default values to certain options, which will be overridden by the input options + // if present. + options = {...DEFAULT_OPTIONS, ...options}; + + this.workspaceNamespace = options.workspaceNamespace; + this.onStatusUpdate = options.onStatusUpdate ? options.onStatusUpdate : () => {}; + this.abortSignal = options.abortSignal; + this.currentDelay = options.initialPollingDelay; + this.maxDelay = options.maxPollingDelay; + this.overallTimeout = options.overallTimeout; + this.maxCreateCount = options.maxCreateCount; + this.maxDeleteCount = options.maxDeleteCount; + this.maxResumeCount = options.maxResumeCount; + this.maxServerErrorCount = options.maxServerErrorCount; + } + + private async getCluster(): Promise { + return await clusterApi().getCluster(this.workspaceNamespace, {signal: this.abortSignal}); + } + + private async createCluster(): Promise { + if (this.createCount >= this.maxCreateCount) { + throw new ExceededActionCountError( + `Reached max cluster create count (${this.maxCreateCount})`, this.currentCluster); + } + const cluster = await clusterApi().createCluster(this.workspaceNamespace, {signal: this.abortSignal}); + this.createCount++; + return cluster; + } + + private async resumeCluster(): Promise { + if (this.resumeCount >= this.maxResumeCount) { + throw new ExceededActionCountError( + `Reached max cluster resume count (${this.maxResumeCount})`, this.currentCluster); + } + await notebooksClusterApi().startCluster( + this.currentCluster.clusterNamespace, this.currentCluster.clusterName, {signal: this.abortSignal}); + this.resumeCount++; + } + + private async deleteCluster(): Promise { + if (this.deleteCount >= this.maxDeleteCount) { + throw new ExceededActionCountError( + `Reached max cluster delete count (${this.maxDeleteCount})`, this.currentCluster); + } + await clusterApi().deleteCluster(this.workspaceNamespace, {signal: this.abortSignal}); + this.deleteCount++; + } + + private isClusterRunning(): boolean { + return this.currentCluster && this.currentCluster.status === ClusterStatus.Running; + } + + private isClusterStopped(): boolean { + return this.currentCluster && this.currentCluster.status === ClusterStatus.Stopped; + } + + private isClusterErrored(): boolean { + return this.currentCluster && this.currentCluster.status === ClusterStatus.Error; + } + + private isNotFoundError(e: any): boolean { + // Our Swagger-generated APIs throw an error of type Response on a non-success status code. + return e instanceof Response && e.status === 404; + } + + private handleUnknownError(e: any) { + if (e instanceof Response && e.status >= 500 && e.status < 600) { + this.serverErrorCount++; + } + reportError(e); + } + + private hasTooManyServerErrors(): boolean { + return this.serverErrorCount > this.maxServerErrorCount; + } + + /** + * Runs the cluster intiailizer flow. + * + * The strategy here is to poll the getCluster endpoint for cluster status, waiting for the + * cluster to reach the ready state (ClusterStatus.Running) or an error state which can be + * recovered from. Action will be taken where possible: a stopped cluster will trigger a call to + * startCluster, a nonexistent cluster will trigger a call to createCluster, and an errored + * cluster will trigger a call to deleteCluster in an attempt to retry cluster creation. + * + * @return A Promise which resolves with a Cluster or rejects with a + * ClusterInitializationFailedError, which holds a message and the current Cluster object (if one + * existed at the time of failure). + */ + private async run(): Promise { + this.initializeStartTime = Date.now(); + + return new Promise((resolve, reject) => { + this.resolve = resolve; + this.reject = reject as (error: Error) => {}; + this.poll(); + }) as Promise; + } + + private async poll() { + // Overall strategy: continue polling the get-cluster endpoint, with capped exponential backoff, + // until we either reach our goal state (a RUNNING cluster) or run up against the overall + // timeout threshold. + // + // Certain cluster states require active intervention, such as deleting or resuming the cluster; + // these are handled within the the polling loop. + if (this.abortSignal && this.abortSignal.aborted) { + // We'll bail out early if an abort signal was triggered while waiting for the poll cycle. + return this.reject( + new ClusterInitializationFailedError('Request was aborted', this.currentCluster)); + } + if (Date.now() - this.initializeStartTime > this.overallTimeout) { + return this.reject( + new ClusterInitializationFailedError( + `Initialization attempt took longer than the max time allowed (${this.overallTimeout}ms)`, + this.currentCluster)); + } + + // Fetch the current cluster status, with some graceful error handling for NOT_FOUND response + // and abort signals. + try { + this.currentCluster = await this.getCluster(); + this.onStatusUpdate(this.currentCluster.status); + } catch (e) { + if (isAbortError(e)) { + return this.reject( + new ClusterInitializationAbortedError('Abort signal received during cluster API call', + this.currentCluster)); + } else if (this.isNotFoundError(e)) { + // A not-found error is somewhat expected, if a cluster has recently been deleted or + // hasn't been created yet. + this.currentCluster = null; + this.onStatusUpdate(null); + } else { + this.handleUnknownError(e); + if (this.hasTooManyServerErrors()) { + return this.reject( + new ExceededErrorCountError( + `Reached max server error count (${this.maxServerErrorCount})`, this.currentCluster)); + } + } + } + + // Attempt to take the appropriate next action given the current cluster status. + try { + if (this.currentCluster === null) { + await this.createCluster(); + } else if (this.isClusterStopped()) { + await this.resumeCluster(); + } else if (this.isClusterErrored()) { + // If cluster is in error state, delete it so it can be re-created at the next poll loop. + reportError( + `Cluster ${this.currentCluster.clusterNamespace}/${this.currentCluster.clusterName}` + + ` has reached an ERROR status`); + await this.deleteCluster(); + } else if (this.isClusterRunning()) { + // We've reached the goal - resolve the Promise. + return this.resolve(this.currentCluster); + } + } catch (e) { + if (isAbortError(e)) { + return this.reject( + new ClusterInitializationFailedError('Abort signal received during cluster API call', + this.currentCluster)); + } else if (e instanceof ExceededActionCountError) { + // This is a signal that we should hard-abort the polling loop due to reaching the max + // number of delete or create actions allowed. + return this.reject(e); + } else { + this.handleUnknownError(e); + if (this.hasTooManyServerErrors()) { + return this.reject( + new ExceededErrorCountError( + `Reached max server error count (${this.maxServerErrorCount})`, this.currentCluster)); + } + } + } + + setTimeout(() => this.poll(), this.currentDelay); + // Increment capped exponential backoff for the next poll loop. + this.currentDelay = Math.min(this.currentDelay * 1.3, this.maxDelay); + } +} diff --git a/ui/src/app/utils/errors.tsx b/ui/src/app/utils/errors.tsx index 10c79cc2933..e38aefd6795 100644 --- a/ui/src/app/utils/errors.tsx +++ b/ui/src/app/utils/errors.tsx @@ -14,8 +14,8 @@ export function setStackdriverErrorReporter(reporter: StackdriverErrorReporter) /** * Reports an error to Stackdriver error logging, if enabled. */ -export function reportError(err: Error) { - console.error(err); +export function reportError(err: (Error|string)) { + console.error('Reporting error to Stackdriver: ', err); if (stackdriverReporter) { stackdriverReporter.report(err, (e) => { // Note: this does not detect non-200 responses from Stackdriver: @@ -29,7 +29,7 @@ export function reportError(err: Error) { /** Returns true if the given error is an AbortError, as used in fetch() aborts. */ export function isAbortError(e: Error) { - return (e instanceof DOMException) && e.name === 'AbortError'; + return e instanceof DOMException && e.name === 'AbortError'; } // convert error response from API JSON to ErrorResponse object, otherwise, report parse error diff --git a/ui/src/testing/stubs/cluster-api-stub.ts b/ui/src/testing/stubs/cluster-api-stub.ts index a5709608002..e89d9086719 100644 --- a/ui/src/testing/stubs/cluster-api-stub.ts +++ b/ui/src/testing/stubs/cluster-api-stub.ts @@ -3,8 +3,7 @@ import { ClusterApi, ClusterLocalizeRequest, ClusterLocalizeResponse, - ClusterStatus, - DefaultClusterResponse, + ClusterStatus } from 'generated/fetch'; export class ClusterApiStub extends ClusterApi { @@ -20,21 +19,26 @@ export class ClusterApiStub extends ClusterApi { }; } - listClusters(extraHttpRequestParams?: any): Promise { - return new Promise(resolve => { - resolve({defaultCluster: this.cluster}); + getCluster(workspaceNamespace: string, options?: any): Promise { + return new Promise(resolve => { + resolve(this.cluster); }); } - deleteCluster(projectName: string, clusterName: string, - extraHttpRequestParams?: any): Promise<{}> { + createCluster(workspaceNamespace: string, options?: any): Promise { + return new Promise(resolve => { + resolve(this.cluster); + }); + } + + deleteCluster(workspaceNamespace: string, options?: any): Promise<{}> { return new Promise<{}>(resolve => { this.cluster.status = ClusterStatus.Deleting; resolve({}); }); } - localize(projectName: string, clusterName: string, req: ClusterLocalizeRequest, + localize(projectName: string, req: ClusterLocalizeRequest, extraHttpRequestParams?: any): Promise { return new Promise(resolve => { resolve({clusterLocalDirectory: 'workspaces/${req.workspaceId}'}); diff --git a/ui/src/testing/stubs/cluster-service-stub.ts b/ui/src/testing/stubs/cluster-service-stub.ts index 06f97740287..b1b3319c199 100644 --- a/ui/src/testing/stubs/cluster-service-stub.ts +++ b/ui/src/testing/stubs/cluster-service-stub.ts @@ -5,7 +5,8 @@ import { ClusterLocalizeRequest, ClusterLocalizeResponse, ClusterStatus, - DefaultClusterResponse, + CreateClusterResponse, + GetClusterResponse, } from 'generated'; export class ClusterServiceStub { @@ -21,16 +22,25 @@ export class ClusterServiceStub { }; } - listClusters(extraHttpRequestParams?: any): Observable { - return new Observable(observer => { + getCluster(extraHttpRequestParams?: any): Observable { + return new Observable(observer => { setTimeout(() => { - observer.next({defaultCluster: this.cluster}); + observer.next({cluster: this.cluster}); observer.complete(); }, 0); }); } - localize(projectName: string, clusterName: string, req: ClusterLocalizeRequest, + createCluster(extraHttpRequestParams?: any): Observable { + return new Observable(observer => { + setTimeout(() => { + observer.next({cluster: this.cluster}); + observer.complete(); + }, 0); + }); + } + + localize(projectName: string, req: ClusterLocalizeRequest, extraHttpRequestParams?: any): Observable<{}> { return new Observable(observer => { setTimeout(() => { @@ -42,8 +52,7 @@ export class ClusterServiceStub { }); } - deleteCluster(projectName: string, clusterName: string, - extraHttpRequestParams?: any): Observable<{}> { + deleteCluster(projectName: string, extraHttpRequestParams?: any): Observable<{}> { return new Observable<{}>(observer => { setTimeout(() => { observer.next({});