Skip to content

Commit 2cd9f26

Browse files
Merge pull request #75585 from ClickHouse/cherrypick/24.12/75319
Cherry pick #75319 to 24.12: mask AzureBlobStorage table engine credentials
2 parents 338f74c + 13bbe92 commit 2cd9f26

File tree

3 files changed

+131
-9
lines changed

3 files changed

+131
-9
lines changed

src/Parsers/FunctionSecretArgumentsFinder.h

+46-3
Original file line numberDiff line numberDiff line change
@@ -269,16 +269,16 @@ class FunctionSecretArgumentsFinder
269269
return;
270270

271271
/// We should check other arguments first because we don't need to do any replacement in case of
272-
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
273-
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
272+
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format) -- in this case there is no account_key argument
273+
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, format) -- in this case there is no account_key argument
274274
size_t count = function->arguments->size();
275275
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
276276
{
277277
String fourth_arg;
278278
if (tryGetStringFromArgument(url_arg_idx + 3, &fourth_arg))
279279
{
280280
if (fourth_arg == "auto" || KnownFormatNames::instance().exists(fourth_arg))
281-
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
281+
return;
282282
}
283283
}
284284

@@ -485,6 +485,10 @@ class FunctionSecretArgumentsFinder
485485
{
486486
findURLSecretArguments();
487487
}
488+
else if (engine_name == "AzureBlobStorage")
489+
{
490+
findAzureBlobStorageTableEngineSecretArguments();
491+
}
488492
}
489493

490494
void findExternalDistributedTableEngineSecretArguments()
@@ -538,6 +542,41 @@ class FunctionSecretArgumentsFinder
538542
markSecretArgument(2);
539543
}
540544

545+
void findAzureBlobStorageTableEngineSecretArguments()
546+
{
547+
/// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
548+
size_t url_arg_idx = 0;
549+
550+
if (isNamedCollectionName(url_arg_idx))
551+
{
552+
/// AzureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
553+
if (maskAzureConnectionString(-1, true, 1))
554+
return;
555+
findSecretNamedArgument("account_key", 1);
556+
return;
557+
}
558+
559+
if (maskAzureConnectionString(url_arg_idx))
560+
return;
561+
562+
/// We should check other arguments first because we don't need to do any replacement in case of
563+
/// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format) -- in this case there is no account_key argument
564+
size_t count = function->arguments->size();
565+
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
566+
{
567+
String fourth_arg;
568+
if (tryGetStringFromArgument(url_arg_idx + 3, &fourth_arg))
569+
{
570+
if (fourth_arg == "auto" || KnownFormatNames::instance().exists(fourth_arg))
571+
return;
572+
}
573+
}
574+
575+
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
576+
if (url_arg_idx + 4 < count)
577+
markSecretArgument(url_arg_idx + 4);
578+
}
579+
541580
void findDatabaseEngineSecretArguments()
542581
{
543582
const String & engine_name = function->name();
@@ -592,6 +631,10 @@ class FunctionSecretArgumentsFinder
592631
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
593632
markSecretArgument(2);
594633
}
634+
else if (engine_name == "AzureBlobStorage")
635+
{
636+
findAzureBlobStorageTableEngineSecretArguments();
637+
}
595638
}
596639

597640
/// Whether a specified argument can be the name of a named collection?

tests/integration/test_mask_sensitive_info/test.py

+74
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,67 @@ def make_test_case(endpoint_specs):
178178
assert password not in name
179179

180180

181+
def test_backup_table_AzureBlobStorage():
182+
azure_conn_string = cluster.env_variables["AZURITE_CONNECTION_STRING"]
183+
azure_storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
184+
azure_account_name = "devstoreaccount1"
185+
azure_account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
186+
187+
setup_queries = [
188+
"CREATE TABLE backup_test (x int) ENGINE = MergeTree ORDER BY x",
189+
"INSERT INTO backup_test SELECT * FROM numbers(10)",
190+
]
191+
192+
endpoints_with_credentials = [
193+
(
194+
f"AzureBlobStorage('{azure_conn_string}', 'cont', 'backup_test_base')",
195+
f"AzureBlobStorage('{azure_conn_string}', 'cont', 'backup_test_incremental')",
196+
),
197+
(
198+
f"AzureBlobStorage('{azure_storage_account_url}', 'cont', 'second_backup_test_base', '{azure_account_name}', '{azure_account_key}')",
199+
f"AzureBlobStorage('{azure_storage_account_url}', 'cont', 'second_backup_test_incremental', '{azure_account_name}', '{azure_account_key}')",
200+
)
201+
]
202+
203+
for query in setup_queries:
204+
node.query_and_get_answer_with_error(query)
205+
206+
# Actually need to make two backups to have base_backup
207+
def make_test_case(endpoint_specs):
208+
# Run ASYNC so it returns the backup id
209+
return (
210+
f"BACKUP TABLE backup_test TO {endpoint_specs[0]} ASYNC",
211+
f"BACKUP TABLE backup_test TO {endpoint_specs[1]} SETTINGS async=1, base_backup={endpoint_specs[0]}",
212+
)
213+
214+
test_cases = [
215+
make_test_case(endpoint_spec) for endpoint_spec in endpoints_with_credentials
216+
]
217+
for base_query, inc_query in test_cases:
218+
node.query_and_get_answer_with_error(base_query)[0]
219+
220+
inc_backup_query_output = node.query_and_get_answer_with_error(inc_query)[0]
221+
inc_backup_id = TSV.toMat(inc_backup_query_output)[0][0]
222+
names_in_system_backups_output, _ = node.query_and_get_answer_with_error(
223+
f"SELECT base_backup_name, name FROM system.backups where id = '{inc_backup_id}'"
224+
)
225+
226+
base_backup_name, name = TSV.toMat(names_in_system_backups_output)[0]
227+
228+
assert azure_account_key not in base_backup_name
229+
assert azure_account_key not in name
230+
231+
181232
def test_create_table():
182233
password = new_password()
234+
azure_conn_string = cluster.env_variables["AZURITE_CONNECTION_STRING"]
235+
account_key_pattern = re.compile("AccountKey=.*?(;|$)")
236+
masked_azure_conn_string = re.sub(
237+
account_key_pattern, "AccountKey=[HIDDEN]\\1", azure_conn_string
238+
)
239+
azure_storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
240+
azure_account_name = "devstoreaccount1"
241+
azure_account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
183242

184243
table_engines = [
185244
f"MySQL('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
@@ -210,6 +269,15 @@ def test_create_table():
210269
f"Iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
211270
"DNS_ERROR",
212271
),
272+
(
273+
f"IcebergS3('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
274+
"DNS_ERROR",
275+
),
276+
f"AzureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')",
277+
f"AzureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')",
278+
f"AzureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_2.csv', '{azure_account_name}', '{azure_account_key}')",
279+
f"AzureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_3.csv', '{azure_account_name}', '{azure_account_key}', 'CSV')",
280+
f"AzureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none')",
213281
]
214282

215283
def make_test_case(i):
@@ -275,6 +343,12 @@ def make_test_case(i):
275343
"CREATE TABLE table19 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV') SETTINGS mode = 'ordered'",
276344
"CREATE TABLE table20 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV', 'gzip') SETTINGS mode = 'ordered'",
277345
"CREATE TABLE table21 (`x` int) ENGINE = Iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
346+
"CREATE TABLE table22 (`x` int) ENGINE = IcebergS3('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
347+
f"CREATE TABLE table23 (`x` int) ENGINE = AzureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')",
348+
f"CREATE TABLE table24 (`x` int) ENGINE = AzureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')",
349+
f"CREATE TABLE table25 (`x` int) ENGINE = AzureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_2.csv', '{azure_account_name}', '[HIDDEN]')",
350+
f"CREATE TABLE table26 (`x` int) ENGINE = AzureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_3.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')",
351+
f"CREATE TABLE table27 (`x` int) ENGINE = AzureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')",
278352
],
279353
must_not_contain=[password],
280354
)

tests/integration/test_storage_azure_blob_storage/test.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,11 @@ def test_format_detection(cluster):
13181318
storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
13191319
account_name = "devstoreaccount1"
13201320
account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
1321+
account_key_pattern = re.compile("AccountKey=.*?(;|$)")
1322+
masked_azure_conn_string = re.sub(
1323+
account_key_pattern, "AccountKey=[HIDDEN]\\1", connection_string
1324+
)
1325+
13211326
azure_query(
13221327
node,
13231328
f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String') select number as x, 'str_' || toString(number) from numbers(0) SETTINGS azure_truncate_on_insert=1",
@@ -1392,7 +1397,7 @@ def test_format_detection(cluster):
13921397
)
13931398
assert (
13941399
result
1395-
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{connection_string}\\', \\'cont\\', \\'test_format_detection1\\', \\'JSON\\')\n"
1400+
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{masked_azure_conn_string}\\', \\'cont\\', \\'test_format_detection1\\', \\'JSON\\')\n"
13961401
)
13971402

13981403
azure_query(
@@ -1405,7 +1410,7 @@ def test_format_detection(cluster):
14051410
)
14061411
assert (
14071412
result
1408-
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{connection_string}\\', \\'cont\\', \\'test_format_detection1\\', \\'JSON\\')\n"
1413+
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{masked_azure_conn_string}\\', \\'cont\\', \\'test_format_detection1\\', \\'JSON\\')\n"
14091414
)
14101415

14111416
azure_query(
@@ -1418,7 +1423,7 @@ def test_format_detection(cluster):
14181423
)
14191424
assert (
14201425
result
1421-
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{connection_string}\\', \\'cont\\', \\'test_format_detection1\\', \\'JSON\\', \\'none\\')\n"
1426+
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{masked_azure_conn_string}\\', \\'cont\\', \\'test_format_detection1\\', \\'JSON\\', \\'none\\')\n"
14221427
)
14231428

14241429
azure_query(
@@ -1431,7 +1436,7 @@ def test_format_detection(cluster):
14311436
)
14321437
assert (
14331438
result
1434-
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{storage_account_url}\\', \\'cont\\', \\'test_format_detection1\\', \\'{account_name}\\', \\'{account_key}\\', \\'JSON\\')\n"
1439+
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{storage_account_url}\\', \\'cont\\', \\'test_format_detection1\\', \\'{account_name}\\', \\'[HIDDEN]\\', \\'JSON\\')\n"
14351440
)
14361441

14371442
azure_query(
@@ -1444,7 +1449,7 @@ def test_format_detection(cluster):
14441449
)
14451450
assert (
14461451
result
1447-
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{storage_account_url}\\', \\'cont\\', \\'test_format_detection1\\', \\'{account_name}\\', \\'{account_key}\\', \\'JSON\\')\n"
1452+
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{storage_account_url}\\', \\'cont\\', \\'test_format_detection1\\', \\'{account_name}\\', \\'[HIDDEN]\\', \\'JSON\\')\n"
14481453
)
14491454

14501455
azure_query(
@@ -1457,7 +1462,7 @@ def test_format_detection(cluster):
14571462
)
14581463
assert (
14591464
result
1460-
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{storage_account_url}\\', \\'cont\\', \\'test_format_detection1\\', \\'{account_name}\\', \\'{account_key}\\', \\'JSON\\', \\'none\\')\n"
1465+
== f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{storage_account_url}\\', \\'cont\\', \\'test_format_detection1\\', \\'{account_name}\\', \\'[HIDDEN]\\', \\'JSON\\', \\'none\\')\n"
14611466
)
14621467

14631468

0 commit comments

Comments
 (0)