Skip to content

Commit

Permalink
add code to handle the InvalidParameterValue exception when a existin…
Browse files Browse the repository at this point in the history
…g UC external location is sub path of the prefix we are going to create an external location with.
  • Loading branch information
qziyuan committed Mar 6, 2024
1 parent 863ba60 commit 3884053
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 11 deletions.
29 changes: 22 additions & 7 deletions src/databricks/labs/ucx/azure/locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from databricks.labs.blueprint.installation import Installation
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors.platform import PermissionDenied
from databricks.sdk.errors.platform import InvalidParameterValue, PermissionDenied

from databricks.labs.ucx.azure.access import AzureResourcePermissions
from databricks.labs.ucx.azure.resources import AzureResources
Expand Down Expand Up @@ -98,6 +98,20 @@ def _create_location_name(self, location_url: str) -> str:
res_name = after_at.replace(".dfs.core.windows.net", "").rstrip("/").replace("/", "_")
return f"{container_name}_{res_name}"

def _create_external_location_helper(
self, name, url, credential, comment="Created by UCX", read_only=False, skip_validation=False
) -> str | None:
try:
self._ws.external_locations.create(
name, url, credential, comment=comment, read_only=read_only, skip_validation=skip_validation
)
return url
except InvalidParameterValue as invalid:
if "overlaps with an existing external location" in str(invalid):
logger.warning(f"Skip creating external location, see details: {str(invalid)}")
return None
raise invalid

def _create_external_location(
self, location_url: str, prefix_mapping_write: dict[str, str], prefix_mapping_read: dict[str, str]
) -> str | None:
Expand All @@ -109,35 +123,35 @@ def _create_external_location(

# try to create external location with write privilege first
if container_url in prefix_mapping_write:
self._ws.external_locations.create(
url = self._create_external_location_helper(
location_name, location_url, prefix_mapping_write[container_url], comment="Created by UCX"
)
return location_url
return url
# if no matched write privilege credential, try to create read-only external location
if container_url in prefix_mapping_read:
try:
self._ws.external_locations.create(
url = self._create_external_location_helper(
location_name,
location_url,
prefix_mapping_read[container_url],
comment="Created by UCX",
read_only=True,
)
return url
except PermissionDenied as denied:
if "No file available under the location to read" in str(denied):
# Empty location will cause failed READ permission check with read-only credential
# Skip skip_validation in this case
self._ws.external_locations.create(
url = self._create_external_location_helper(
location_name,
location_url,
prefix_mapping_read[container_url],
comment="Created by UCX",
read_only=True,
skip_validation=True,
)
return location_url
return url
raise denied
return location_url
# if no credential found
return None

Expand Down Expand Up @@ -166,6 +180,7 @@ def run(self):
"2. If you use service principal in extra_config when create dbfs mount or use service principal "
"in your code directly for storage access, UCX cannot automatically migrate them to storage credential."
"Please manually create those storage credentials first."
"3. You may have overlapping external location already in UC."
)
for loc_url in leftover_loc_urls:
logger.info(f"Not created external location: {loc_url}")
Expand Down
40 changes: 40 additions & 0 deletions tests/integration/azure/test_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,43 @@ def test_missing_credential(caplog, ws, sql_backend, inventory_schema):

assert "External locations below are not created in UC" in caplog.text
assert len(leftover_loc) == 2


@pytest.mark.skip
def test_overlapping_location(caplog, ws, sql_backend, inventory_schema):
"""Customer may already create external location with url that is a sub path of the table prefix hive_metastore/locations.py extracted.
This test case is to verify the overlapping location will be detected and reported.
"""
# create an external location first so the overlapping conflict will be triggered latter
ws.external_locations.create(
"uctest_ziyuanqintest_overlap", "abfss://uctest@ziyuanqintest.dfs.core.windows.net/a", "oneenv-adls"
)

locations = [ExternalLocation("abfss://uctest@ziyuanqintest.dfs.core.windows.net/", 1)]
sql_backend.save_table(f"{inventory_schema}.external_locations", locations, ExternalLocation)
location_crawler = ExternalLocations(ws, sql_backend, inventory_schema)

installation = MockInstallation(
{
"azure_storage_account_info.csv": [
{
'prefix': 'abfss://uctest@ziyuanqintest.dfs.core.windows.net/',
'client_id': "redacted-for-github-929e765443eb",
'principal': "oneenv-adls",
'privilege': "WRITE_FILES",
'type': "Application",
}
]
}
)
azurerm = AzureResources(ws)

location_migration = ExternalLocationsMigration(
ws, location_crawler, AzureResourcePermissions(installation, ws, azurerm, location_crawler), azurerm
)
try:
leftover_loc_urls = location_migration.run()
assert "abfss://uctest@ziyuanqintest.dfs.core.windows.net/" in leftover_loc_urls
assert "overlaps with an existing external location" in caplog.text
finally:
save_delete_location(ws, "uctest_ziyuanqintest_overlap")
86 changes: 82 additions & 4 deletions tests/unit/azure/test_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from databricks.labs.blueprint.installation import MockInstallation
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors.platform import PermissionDenied
from databricks.sdk.errors.platform import InvalidParameterValue, PermissionDenied
from databricks.sdk.service.catalog import (
AzureManagedIdentity,
AzureServicePrincipal,
Expand Down Expand Up @@ -96,13 +96,16 @@ def test_run_service_principal(ws):
"abfss://container1@test.dfs.core.windows.net/one/",
"credential_sp1",
comment="Created by UCX",
read_only=False,
skip_validation=False,
)
ws.external_locations.create.assert_any_call(
"container2_test",
"abfss://container2@test.dfs.core.windows.net/",
"credential_sp2",
comment="Created by UCX",
read_only=True,
skip_validation=False,
)


Expand Down Expand Up @@ -178,13 +181,16 @@ def test_run_managed_identity(ws, mocker):
"abfss://container4@test.dfs.core.windows.net/",
"credential_system_assigned_mi",
comment="Created by UCX",
read_only=False,
skip_validation=False,
)
ws.external_locations.create.assert_any_call(
"container5_test_a_b",
"abfss://container5@test.dfs.core.windows.net/a/b/",
"credential_user_assigned_mi",
comment="Created by UCX",
read_only=True,
skip_validation=False,
)


Expand All @@ -193,8 +199,12 @@ def create_side_effect(location_name, *args, **kwargs): # pylint: disable=unuse
if not kwargs.get('skip_validation'):
if "empty" in location_name:
raise PermissionDenied("No file available under the location to read")
if "exception" in location_name:
if "other_permission_denied" in location_name:
raise PermissionDenied("Other PermissionDenied exception")
if "overlap_location" in location_name:
raise InvalidParameterValue("overlaps with an existing external location")
if "other_invalid_parameter" in location_name:
raise InvalidParameterValue("Other InvalidParameterValue exception")


def test_location_failed_to_read(ws):
Expand All @@ -206,7 +216,7 @@ def test_location_failed_to_read(ws):
rows={
r"SELECT \* FROM location_test.external_locations": [
row_factory(["abfss://empty@test.dfs.core.windows.net/", 1]),
row_factory(["abfss://exception@test.dfs.core.windows.net/", 2]),
row_factory(["abfss://other_permission_denied@test.dfs.core.windows.net/", 2]),
]
}
)
Expand Down Expand Up @@ -234,7 +244,7 @@ def test_location_failed_to_read(ws):
'directory_id': 'directory_id_1',
},
{
'prefix': 'abfss://exception@test.dfs.core.windows.net/',
'prefix': 'abfss://other_permission_denied@test.dfs.core.windows.net/',
'client_id': 'application_id_2',
'principal': 'credential_sp2',
'privilege': 'READ_FILES',
Expand Down Expand Up @@ -269,6 +279,74 @@ def test_location_failed_to_read(ws):
)


def test_overlapping_locations(ws, caplog):
caplog.set_level(logging.INFO)

# mock crawled HMS external locations
row_factory = type("Row", (Row,), {"__columns__": ["location", "table_count"]})
mock_backend = MockBackend(
rows={
r"SELECT \* FROM location_test.external_locations": [
row_factory(["abfss://overlap_location@test.dfs.core.windows.net/a/", 1]),
row_factory(["abfss://other_invalid_parameter@test.dfs.core.windows.net/a/", 1]),
]
}
)

# mock listing storage credentials
ws.storage_credentials.list.return_value = [
StorageCredentialInfo(
name="credential_sp1",
azure_service_principal=AzureServicePrincipal(
"directory_id_1",
"application_id_1",
"test_secret",
),
)
]

# mock listing UC external locations, a location that is sub path of prefix is already created
ws.external_locations.list.return_value = [ExternalLocationInfo(name="none", url="none")]

# mock installation with permission mapping
mock_installation = MockInstallation(
{
"azure_storage_account_info.csv": [
{
'prefix': 'abfss://overlap_location@test.dfs.core.windows.net/',
'client_id': 'application_id_1',
'principal': 'credential_sp1',
'privilege': 'WRITE_FILES',
'type': 'Application',
'directory_id': 'directory_id_1',
},
{
'prefix': 'abfss://other_invalid_parameter@test.dfs.core.windows.net/',
'client_id': 'application_id_1',
'principal': 'credential_sp1',
'privilege': 'WRITE_FILES',
'type': 'Application',
'directory_id': 'directory_id_1',
},
],
}
)

ws.external_locations.create.side_effect = create_side_effect

location_crawler = ExternalLocations(ws, mock_backend, "location_test")
azurerm = AzureResources(ws)
location_migration = ExternalLocationsMigration(
ws, location_crawler, AzureResourcePermissions(mock_installation, ws, azurerm, location_crawler), azurerm
)

# assert InvalidParameterValue got re-threw if it's not caused by overlapping location
with pytest.raises(InvalidParameterValue):
location_migration.run()
# assert the InvalidParameterValue due to overlapping location is handled.
assert "overlaps with an existing external location" in caplog.text


def test_corner_cases_with_missing_fields(ws, caplog, mocker):
"""test corner cases with: missing credential name, missing application_id"""
caplog.set_level(logging.INFO)
Expand Down

0 comments on commit 3884053

Please sign in to comment.