Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed the entitlements application for account-level groups #529

Merged
merged 10 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions src/databricks/labs/ucx/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
from databricks.labs.ucx.hive_metastore.data_objects import ExternalLocationCrawler
from databricks.labs.ucx.hive_metastore.mounts import Mounts
from databricks.labs.ucx.workspace_access.generic import WorkspaceListing
from databricks.labs.ucx.workspace_access.groups import GroupManager
from databricks.labs.ucx.workspace_access.groups import (
GroupManager,
GroupMigrationState,
)
from databricks.labs.ucx.workspace_access.manager import PermissionManager

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -367,14 +370,16 @@ def apply_permissions_to_backup_groups(cfg: WorkspaceConfig):
logger.info("Skipping group migration as no groups were found.")
return

backend = RuntimeBackend()
permission_manager = PermissionManager.factory(
ws,
RuntimeBackend(),
backend,
cfg.inventory_database,
num_threads=cfg.num_threads,
workspace_start_path=cfg.workspace_start_path,
)
permission_manager.apply_group_permissions(group_manager.migration_state, destination="backup")
group_manager.migration_state.persist_migration_state(backend, cfg.inventory_database)


@task("003-replace-workspace-local-with-account-groups", depends_on=[apply_permissions_to_backup_groups])
Expand All @@ -383,11 +388,11 @@ def replace_workspace_groups_with_account_groups(cfg: WorkspaceConfig):
- Creates an account-level group with the original name of the workspace-local one"""
ws = WorkspaceClient(config=cfg.to_databricks_config())
group_manager = GroupManager(ws, cfg.groups)
group_manager.prepare_groups_in_environment()
if not group_manager.has_groups():
remote_state = GroupMigrationState().fetch_migration_state(RuntimeBackend(), cfg.inventory_database)
william-conti marked this conversation as resolved.
Show resolved Hide resolved
if len(remote_state.groups) == 0:
logger.info("Skipping group migration as no groups were found.")
return
group_manager.replace_workspace_groups_with_account_groups()
group_manager.replace_workspace_groups_with_account_groups(remote_state)


@task(
Expand All @@ -406,15 +411,19 @@ def apply_permissions_to_account_groups(cfg: WorkspaceConfig):

See [interactive tutorial here](https://app.getreprise.com/launch/myM3VNn/)."""
ws = WorkspaceClient(config=cfg.to_databricks_config())
backend = RuntimeBackend()

migration_state = GroupManager.prepare_apply_permissions_to_account_groups(ws, cfg.groups.backup_group_prefix)
if len(migration_state) == 0:
remote_state = GroupMigrationState().fetch_migration_state(backend, cfg.inventory_database)
migration_state = GroupManager.prepare_apply_permissions_to_account_groups(
ws, remote_state, cfg.groups.backup_group_prefix
)
if len(migration_state.groups) == 0:
logger.info("Skipping group migration as no groups were found.")
return

permission_manager = PermissionManager.factory(
ws,
RuntimeBackend(),
backend,
cfg.inventory_database,
num_threads=cfg.num_threads,
workspace_start_path=cfg.workspace_start_path,
Expand All @@ -436,7 +445,7 @@ def delete_backup_groups(cfg: WorkspaceConfig):
def destroy_schema(cfg: WorkspaceConfig):
"""This _clean-up_ workflow allows to removes the `$inventory` database, with all the inventory tables created by
the previous workflow runs. Use this to reset the entire state and start with the assessment step again."""
RuntimeBackend().execute(f"DROP DATABASE IF EXISTS {cfg.inventory_database} CASCADE")
RuntimeBackend().execute(f"DROP DATABASE {cfg.inventory_database} CASCADE")


def main(*argv):
Expand Down
67 changes: 58 additions & 9 deletions src/databricks/labs/ucx/workspace_access/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import logging
import typing
from dataclasses import dataclass
from dataclasses import dataclass, replace

from databricks.sdk import WorkspaceClient
from databricks.sdk.core import DatabricksError
Expand All @@ -11,18 +11,27 @@
from databricks.sdk.service.iam import Group

from databricks.labs.ucx.config import GroupsConfig
from databricks.labs.ucx.framework.crawlers import SqlBackend
from databricks.labs.ucx.mixins.hardening import rate_limited

logger = logging.getLogger(__name__)

GroupLevel = typing.Literal["workspace", "account"]


# TODO: This class is used to persist MigrationGroupInfo, but Group is a not supported type for backend.save_table()
@dataclass
class MigrationGroupInfoMock:
workspace: str = None
backup: str = None
account: str = None


@dataclass
class MigrationGroupInfo:
workspace: Group
backup: Group
account: Group
workspace: Group = None
backup: Group = None
account: Group = None

def is_name_match(self, name: str) -> bool:
if self.workspace is not None:
Expand Down Expand Up @@ -54,6 +63,36 @@ def add(self, ws_group: Group, backup_group: Group, acc_group: Group):
mgi = MigrationGroupInfo(workspace=ws_group, backup=backup_group, account=acc_group)
self.groups.append(mgi)

def persist_migration_state(self, backend: SqlBackend, inventory_database: str):
rows = []
for group in self.groups:
account = self.group_to_str(group.account) if group.account else None
backup = self.group_to_str(group.backup) if group.backup else None
workspace = self.group_to_str(group.workspace) if group.workspace else None
rows.append(MigrationGroupInfoMock(workspace, backup, account))
logger.info(f"Persisting {len(rows)} to migration state hive_metastore.{inventory_database}.migration_state")
backend.save_table(f"hive_metastore.{inventory_database}.migration_state", rows, MigrationGroupInfoMock)

def group_to_str(self, group: Group):
if group.schemas:
# TODO: Remove this when https://github.com/databricks/databricks-sdk-py/issues/420 is done
group = replace(group, schemas=None)
return json.dumps(group.as_dict())

def fetch_migration_state(self, backend: SqlBackend, inventory_database: str):
migration_group_infos = backend.fetch(f"SELECT * FROM hive_metastore.{inventory_database}.migration_state")

state = GroupMigrationState()
for workspace, backup, account in migration_group_infos:
workspace_group = Group().from_dict(json.loads(workspace)) if workspace else None
backup_group = Group().from_dict(json.loads(backup)) if backup else None
account_group = Group().from_dict(json.loads(account)) if account else None
state.add(workspace_group, backup_group, account_group)
logger.info(
f"{len(list(state.groups))} found to migration state hive_metastore.{inventory_database}.migration_state"
)
return state

def is_in_scope(self, name: str) -> bool:
if name is None:
return False
Expand Down Expand Up @@ -118,13 +157,19 @@ def __init__(self, ws: WorkspaceClient, groups: GroupsConfig):
self._workspace_groups = self._list_workspace_groups()

@classmethod
def prepare_apply_permissions_to_account_groups(cls, ws: WorkspaceClient, backup_group_prefix: str = "db-temp-"):
def prepare_apply_permissions_to_account_groups(
cls, ws: WorkspaceClient, remote_state: GroupMigrationState, backup_group_prefix: str = "db-temp-"
):
scim_attributes = "id,displayName,meta"

account_groups_by_name = {}
for g in cls._list_account_groups(ws, scim_attributes):
account_groups_by_name[g.display_name] = g

remote_state_by_backup_group = {}
for g in remote_state.groups:
remote_state_by_backup_group[(g.backup.display_name, g.account.display_name)] = g

workspace_groups_by_name = {}
account_groups_in_workspace_by_name = {}
for g in ws.groups.list(attributes=scim_attributes):
Expand All @@ -144,9 +189,13 @@ def prepare_apply_permissions_to_account_groups(cls, ws: WorkspaceClient, backup
if backup_group_name not in workspace_groups_by_name:
logger.info(f"Skipping account group `{display_name}`: no backup group in workspace")
continue
if (backup_group_name, display_name) not in remote_state_by_backup_group:
logger.info(f"Skipping account group `{display_name}`: not present in the state")
continue
workspace_group = remote_state_by_backup_group[(backup_group_name, display_name)].workspace
backup_group = workspace_groups_by_name[backup_group_name]
account_group = account_groups_in_workspace_by_name[display_name]
migration_state.add(None, backup_group, account_group)
migration_state.add(workspace_group, backup_group, account_group)

return migration_state

Expand All @@ -168,12 +217,12 @@ def migration_state(self) -> GroupMigrationState:
logger.info("No groups were loaded or initialized, nothing to do")
return self._migration_state

def replace_workspace_groups_with_account_groups(self):
def replace_workspace_groups_with_account_groups(self, migration_state: GroupMigrationState):
logger.info("Replacing the workspace groups with account-level groups")
if len(self._migration_state) == 0:
if len(migration_state) == 0:
logger.info("No groups were loaded or initialized, nothing to do")
return
for migration_info in self.migration_state.groups:
for migration_info in migration_state.groups:
self._replace_group(migration_info)
logger.info("Workspace groups were successfully replaced with account-level groups")

Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/workspace_access/migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def apply_permissions_to_backup_groups(self):
def verify_permissions_on_backup_groups(self, to_verify):
self._verification_manager.verify(self._group_manager.migration_state, "backup", to_verify)

def replace_workspace_groups_with_account_groups(self):
self._group_manager.replace_workspace_groups_with_account_groups()
def replace_workspace_groups_with_account_groups(self, migration_state):
self._group_manager.replace_workspace_groups_with_account_groups(migration_state)

def apply_permissions_to_account_groups(self):
self._permissions_manager.apply_group_permissions(self._group_manager.migration_state, destination="account")
Expand Down
52 changes: 49 additions & 3 deletions tests/integration/workspace_access/test_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@

from databricks.sdk import WorkspaceClient
from databricks.sdk.retries import retried
from databricks.sdk.service.iam import PermissionLevel
from databricks.sdk.service.iam import (
ComplexValue,
Group,
PermissionLevel,
ResourceMeta,
)

from databricks.labs.ucx.config import GroupsConfig
from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler
Expand All @@ -12,7 +17,10 @@
GenericPermissionsSupport,
Listing,
)
from databricks.labs.ucx.workspace_access.groups import GroupManager
from databricks.labs.ucx.workspace_access.groups import (
GroupManager,
GroupMigrationState,
)
from databricks.labs.ucx.workspace_access.manager import PermissionManager
from databricks.labs.ucx.workspace_access.tacl import TableAclSupport

Expand Down Expand Up @@ -129,7 +137,7 @@ def check_permissions_for_backup_group():

check_permissions_for_backup_group()

group_manager.replace_workspace_groups_with_account_groups()
group_manager.replace_workspace_groups_with_account_groups(group_manager.migration_state)

@retried(on=[AssertionError], timeout=timedelta(minutes=1))
def check_permissions_after_replace():
Expand Down Expand Up @@ -230,3 +238,41 @@ def test_recover_from_ws_local_deletion(ws, make_ucx_group):
assert sorted([member.value for member in ws_group_two.members]) == sorted(
[member.value for member in recovered_state[ws_group_two.display_name].members]
)


def test_migration_state_should_be_saved_with_proper_values(sql_backend, make_schema):
inventory_database = make_schema()

workspace = Group(
display_name="workspace", entitlements=[ComplexValue(display="entitlements", value="allow-cluster-create")]
)
backup = Group(display_name="db-temp-workspace", meta=ResourceMeta("test"))
account = Group(display_name="account")

state = GroupMigrationState()
state.add(workspace, backup, account)

state.persist_migration_state(sql_backend, inventory_database.name)
new_state = state.fetch_migration_state(sql_backend, inventory_database.name)

assert new_state.groups == state.groups


def test_migration_state_should_be_saved_without_missing_anything(sql_backend, make_schema):
inventory_database = make_schema()

workspace = Group("workspace")
backup = Group("db-temp-workspace")
account = Group("workspace")

state = GroupMigrationState()
state.add(workspace, backup, account)
state.add(workspace, backup, None)
state.add(workspace, None, account)
state.add(None, None, account)
state.add(None, None, None)

state.persist_migration_state(sql_backend, inventory_database.name)
new_state = state.fetch_migration_state(sql_backend, inventory_database.name)

assert len(new_state.groups) == len(state.groups)
62 changes: 62 additions & 0 deletions tests/integration/workspace_access/test_scim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from databricks.sdk import WorkspaceClient
from databricks.sdk.service import iam

from databricks.labs.ucx.config import GroupsConfig
from databricks.labs.ucx.workspace_access.groups import (
GroupManager,
GroupMigrationState,
)
from databricks.labs.ucx.workspace_access.manager import PermissionManager
from databricks.labs.ucx.workspace_access.scim import ScimSupport


def test_scim(ws: WorkspaceClient, make_ucx_group, sql_backend, inventory_schema):
"""
This test does the following:
* create a ws group with roles and entitlements
* migrate this group
* verify that the migrated group has the same roles and entitlements
:return:
"""
ws_group, acc_group = make_ucx_group()
ws.groups.patch(
ws_group.id,
operations=[
iam.Patch(
op=iam.PatchOp.ADD,
path="entitlements",
value=[iam.ComplexValue(value="databricks-sql-access").as_dict()],
)
],
schemas=[iam.PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP],
)
groups_config = GroupsConfig(selected=[ws_group.display_name])

# Task 1 - crawl_permissions
scim_support = ScimSupport(ws)
pi = PermissionManager(sql_backend, inventory_schema, [scim_support])
pi.cleanup()
pi.inventorize_permissions()

# Task 2 - apply_permissions_to_backup_groups
group_manager = GroupManager(ws, groups_config)
group_manager.prepare_groups_in_environment()
pi.apply_group_permissions(group_manager.migration_state, destination="backup")
group_manager.migration_state.persist_migration_state(sql_backend, inventory_schema)

# Task 3 - replace the groups in the workspace with the account groups
group_manager = GroupManager(ws, groups_config)
remote_state = GroupMigrationState().fetch_migration_state(sql_backend, inventory_schema)
group_manager.replace_workspace_groups_with_account_groups(remote_state)

# Task 4 - apply_permissions_to_account_groups
remote_state = group_manager.migration_state.fetch_migration_state(sql_backend, inventory_schema)
migration_state = GroupManager.prepare_apply_permissions_to_account_groups(
ws, remote_state, groups_config.backup_group_prefix
)
pi.apply_group_permissions(migration_state, destination="account")
assert [
iam.ComplexValue(display=None, primary=None, type=None, value="databricks-sql-access"),
iam.ComplexValue(display=None, primary=None, type=None, value="allow-cluster-create"),
] == ws.groups.get(acc_group.id).entitlements
assert len(ws.groups.get(acc_group.id).members) == len(ws_group.members)
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def test_workspace_access_e2e(

toolkit.verify_permissions_on_backup_groups(to_verify)

toolkit.replace_workspace_groups_with_account_groups()
toolkit.replace_workspace_groups_with_account_groups(group_migration_state)

workspace_acc_membership = toolkit._group_manager.get_workspace_membership("Group")
assert acc_group.display_name in workspace_acc_membership
Expand Down
Loading