Skip to content

Commit

Permalink
Add a command to create account level groups if they do not exist (#763)
Browse files Browse the repository at this point in the history
Attempt to fix 
- #17
- #649

Adds a command to create groups at account level by crawling all
workspaces configured in the account and in scope of the migration

This pull request adds several new methods to the `account.py` file in
the `databricks/labs/ucx` directory. The main method added is
`create_account_level_groups`, which crawls all workspaces in an account
and creates account-level groups if a workspace-local group is not
present in the account. The method `get_valid_workspaces_groups` is
added to retrieve a dictionary of all valid workspace groups, while
`has_not_same_members` checks if two groups have the same members. The
method `get_account_groups` retrieves a dictionary of all account
groups.

Regarding the tests, the `test_account.py` file has been updated to
include new tests for the `create_account_level_groups` method. The test
`test_create_acc_groups_should_create_acc_group_if_no_group_found`
verifies that an account-level group is created if no group with the
same name is found. The test
`test_create_acc_groups_should_filter_groups_in_other_workspaces` checks
that the method filters groups present in other workspaces and only
creates groups that are not present in the account.

Additionally, the `cli.py` file has been updated to include a new
command, `create_account_level_groups`, which uploads workspace config
to all workspaces in the account where ucx is installed.
  • Loading branch information
william-conti authored Feb 28, 2024
1 parent 76bdff9 commit f4efea5
Show file tree
Hide file tree
Showing 7 changed files with 627 additions and 10 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ See [contributing instructions](CONTRIBUTING.md) to help improve this project.
* [Producing table mapping](#producing-table-mapping)
* [Synchronising UCX configurations](#synchronising-ucx-configurations)
* [Validating group membership](#validating-group-membership)
* [Creating account groups](#creating-account-groups)
* [Star History](#star-history)
* [Project Support](#project-support)
<!-- TOC -->
Expand Down Expand Up @@ -197,6 +198,21 @@ Use to validate workspace-level & account-level groups to identify any discrepan
databricks labs ucx validate-groups-membership
```

### Creating account groups
Crawl all workspaces configured in workspace_ids, then creates account level groups if a WS local group is not present
in the account.
If workspace_ids is not specified, it will create account groups for all workspaces configured in the account.

The following scenarios are supported, if a group X:
- Exist in workspaces A,B,C and it has same members in there, it will be created in the account
- Exist in workspaces A,B but not in C, it will be created in the account
- Exist in workspaces A,B,C. It has same members in A,B, but not in C. Then, X and C_X will be created in the
account

```commandline
databricks labs ucx create-account-groups --workspace_ids <comma separated list of workspace id>
```

## Star History

[![Star History Chart](https://api.star-history.com/svg?repos=databrickslabs/ucx&type=Date)](https://star-history.com/#databrickslabs/ucx)
Expand Down
9 changes: 9 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,12 @@ commands:
- name: migrate_credentials
description: Migrate credentials for storage access to UC storage credential

- name: create-account-groups
is_account_level: true
description: |
Creates account level groups for all groups in workspaces provided in workspace_ids.
If workspace_ids is not provided, it will use all workspaces present in the account.
flags:
- name: workspace_ids
description: List of workspace IDs to create account groups from.
139 changes: 130 additions & 9 deletions src/databricks/labs/ucx/account.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
from databricks.labs.blueprint.tui import Prompts
from databricks.sdk import AccountClient, WorkspaceClient
from databricks.sdk.errors import NotFound
from databricks.sdk.service.iam import ComplexValue, Group, Patch, PatchOp, PatchSchema
from databricks.sdk.service.provisioning import Workspace

from databricks.labs.ucx.__about__ import __version__

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -37,13 +36,7 @@ def _get_cloud(self) -> str:
return "aws"

def client_for(self, workspace: Workspace) -> WorkspaceClient:
config = self._ac.config.as_dict()
if "databricks_cli_path" in config:
del config["databricks_cli_path"]
cloud = self._get_cloud()
# copy current config and swap with a host relevant to a workspace
config["host"] = f"https://{workspace.deployment_name}.{self._tlds[cloud]}"
return self._new_workspace_client(**config, product="ucx", product_version=__version__)
return self._ac.get_workspace_client(workspace)

def workspace_clients(self) -> list[WorkspaceClient]:
"""
Expand All @@ -69,6 +62,134 @@ def sync_workspace_info(self):
for installation in Installation.existing(ws, "ucx"):
installation.save(workspaces, filename=self.SYNC_FILE_NAME)

def create_account_level_groups(self, prompts: Prompts, workspace_ids: list[int] | None = None):
acc_groups = self._get_account_groups()
workspace_ids = self._get_valid_workspaces_ids(workspace_ids)
all_valid_workspace_groups = self._get_valid_workspaces_groups(prompts, workspace_ids)

for group_name, valid_group in all_valid_workspace_groups.items():
if group_name in acc_groups:
logger.info(f"Group {group_name} already exist in the account, ignoring")
continue

acc_group = self._ac.groups.create(display_name=group_name)

if not valid_group.members or not acc_group.id:
continue
if len(valid_group.members) > 0:
self._add_members_to_acc_group(self._ac, acc_group.id, group_name, valid_group)
logger.info(f"Group {group_name} created in the account")

def _get_valid_workspaces_ids(self, workspace_ids: list[int] | None = None) -> list[int]:
if not workspace_ids:
logger.info("No workspace ids provided, using current workspace instead")
return [self._new_workspace_client().get_workspace_id()]

all_workspace_ids = [workspace.workspace_id for workspace in self._workspaces()]

valid_workspace_ids = []
for workspace_id in workspace_ids:
if workspace_id in all_workspace_ids:
valid_workspace_ids.append(workspace_id)
else:
logger.info(f"Workspace id {workspace_id} not found on the account")

if not valid_workspace_ids:
raise ValueError("No workspace ids provided in the configuration found in the account")

workspace_ids_str = ','.join(str(x) for x in valid_workspace_ids)
logger.info(f"Creating account groups for workspaces IDs : {workspace_ids_str}")
return valid_workspace_ids

def _add_members_to_acc_group(
self, acc_client: AccountClient, acc_group_id: str, group_name: str, valid_group: Group
):
for chunk in self._chunks(valid_group.members, 20):
logger.debug(f"Adding {len(chunk)} members to acc group {group_name}")
acc_client.groups.patch(
acc_group_id,
operations=[Patch(op=PatchOp.ADD, path="members", value=[x.as_dict() for x in chunk])],
schemas=[PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP],
)

def _chunks(self, lst, chunk_size):
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), chunk_size):
yield lst[i : i + chunk_size]

def _get_valid_workspaces_groups(self, prompts: Prompts, workspace_ids: list[int]) -> dict[str, Group]:
all_workspaces_groups: dict[str, Group] = {}

for workspace in self._workspaces():
if workspace.workspace_id not in workspace_ids:
continue
client = self.client_for(workspace)
logger.info(f"Crawling groups in workspace {client.config.host}")

ws_group_ids = client.groups.list(attributes="id")
for group_id in ws_group_ids:
if not group_id.id:
continue

full_workspace_group = client.groups.get(group_id.id)
group_name = full_workspace_group.display_name

if self._is_group_out_of_scope(full_workspace_group):
continue

if group_name in all_workspaces_groups:
if self._has_same_members(all_workspaces_groups[group_name], full_workspace_group):
logger.info(f"Workspace group {group_name} already found, ignoring")
continue

if prompts.confirm(
f"Group {group_name} does not have the same amount of members "
f"in workspace {client.config.host} than previous workspaces which contains the same group name,"
f"it will be created at the account with name : {workspace.workspace_name}_{group_name}"
):
all_workspaces_groups[f"{workspace.workspace_name}_{group_name}"] = full_workspace_group
continue

if not group_name:
continue

logger.info(f"Found new group {group_name}")
all_workspaces_groups[group_name] = full_workspace_group

logger.info(f"Found a total of {len(all_workspaces_groups)} groups to migrate to the account")

return all_workspaces_groups

def _is_group_out_of_scope(self, group: Group) -> bool:
if group.display_name in {"users", "admins", "account users"}:
logger.debug(f"Group {group.display_name} is a system group, ignoring")
return True
meta = group.meta
if not meta:
return False
if meta.resource_type != "WorkspaceGroup":
logger.debug(f"Group {group.display_name} is an account group, ignoring")
return True
return False

def _has_same_members(self, group_1: Group, group_2: Group) -> bool:
ws_members_set_1 = set([m.display for m in group_1.members] if group_1.members else [])
ws_members_set_2 = set([m.display for m in group_2.members] if group_2.members else [])
return not bool((ws_members_set_1 - ws_members_set_2).union(ws_members_set_2 - ws_members_set_1))

def _get_account_groups(self) -> dict[str | None, list[ComplexValue] | None]:
logger.debug("Listing groups in account")
acc_groups = {}
for acc_grp_id in self._ac.groups.list(attributes="id"):
if not acc_grp_id.id:
continue
full_account_group = self._ac.groups.get(acc_grp_id.id)
logger.debug(f"Found account group {acc_grp_id.display_name}")
acc_groups[full_account_group.display_name] = full_account_group.members

logger.info(f"{len(acc_groups)} account groups found")
return acc_groups


class WorkspaceInfo:
def __init__(self, installation: Installation, ws: WorkspaceClient):
Expand Down
19 changes: 19 additions & 0 deletions src/databricks/labs/ucx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,25 @@ def sync_workspace_info(a: AccountClient):
workspaces.sync_workspace_info()


@ucx.command(is_account=True)
def create_account_groups(a: AccountClient, workspace_ids: list[int] | None = None):
"""
Crawl all workspaces configured in workspace_ids, then creates account level groups if a WS local group is not present
in the account.
If workspace_ids is not specified, it will create account groups for all workspaces configured in the account.
The following scenarios are supported, if a group X:
- Exist in workspaces A,B,C and it has same members in there, it will be created in the account
- Exist in workspaces A,B but not in C, it will be created in the account
- Exist in workspaces A,B,C. It has same members in A,B, but not in C. Then, X and C_X will be created in the
account
"""
logger.info(f"Account ID: {a.config.account_id}")
prompts = Prompts()
workspaces = AccountWorkspaces(a)
workspaces.create_account_level_groups(prompts, workspace_ids)


@ucx.command
def manual_workspace_info(w: WorkspaceClient):
"""only supposed to be run if cannot get admins to run `databricks labs ucx sync-workspace-info`"""
Expand Down
28 changes: 28 additions & 0 deletions tests/integration/test_account.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from databricks.labs.blueprint.tui import MockPrompts
from databricks.sdk import AccountClient
from databricks.sdk.errors import NotFound

from databricks.labs.ucx.account import AccountWorkspaces


def test_create_account_level_groups(make_ucx_group, make_group, make_user, acc, ws, make_random):
suffix = make_random()
make_ucx_group(f"test_ucx_migrate_invalid_{suffix}", f"test_ucx_migrate_invalid_{suffix}")

make_group(display_name=f"regular_group_{suffix}", members=[make_user().id])
AccountWorkspaces(acc).create_account_level_groups(MockPrompts({}), [ws.get_workspace_id()])

results = []
for grp in acc.groups.list():
if grp.display_name in {f"regular_group_{suffix}"}:
results.append(grp)
try_delete_group(acc, grp.id) # Avoids flakiness for future runs

assert len(results) == 1


def try_delete_group(acc: AccountClient, grp_id: str):
try:
acc.groups.delete(grp_id)
except NotFound:
pass
Loading

0 comments on commit f4efea5

Please sign in to comment.