Skip to content

Commit

Permalink
fixup! Fix correctness issue of double slash in Hive connector
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Jun 16, 2023
1 parent f282fa2 commit 488e5c1
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import java.util.Optional;

import static io.trino.filesystem.hdfs.HadoopPaths.hadoopPath;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_PATH_ALREADY_EXISTS;
import static io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY;
import static io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY;
Expand Down Expand Up @@ -63,7 +64,7 @@ public Location forNewTable(SemiTransactionalHiveMetastore metastore, ConnectorS
Location targetPath = getTableDefaultLocation(context, metastore, hdfsEnvironment, schemaName, tableName);

// verify the target directory for table
if (pathExists(context, hdfsEnvironment, new Path(targetPath.toString()))) {
if (pathExists(context, hdfsEnvironment, hadoopPath(targetPath))) {
throw new TrinoException(HIVE_PATH_ALREADY_EXISTS, format("Target directory for table '%s.%s' already exists: %s", schemaName, tableName, targetPath));
}
return targetPath;
Expand All @@ -76,13 +77,13 @@ public LocationHandle forNewTableAsSelect(SemiTransactionalHiveMetastore metasto
Location targetPath = externalLocation.orElseGet(() -> getTableDefaultLocation(context, metastore, hdfsEnvironment, schemaName, tableName));

// verify the target directory for the table
if (pathExists(context, hdfsEnvironment, new Path(targetPath.toString()))) {
if (pathExists(context, hdfsEnvironment, hadoopPath(targetPath))) {
throw new TrinoException(HIVE_PATH_ALREADY_EXISTS, format("Target directory for table '%s.%s' already exists: %s", schemaName, tableName, targetPath));
}

// TODO detect when existing table's location is a on a different file system than the temporary directory
if (shouldUseTemporaryDirectory(context, new Path(targetPath.toString()), externalLocation.isPresent())) {
Location writePath = createTemporaryPath(context, hdfsEnvironment, new Path(targetPath.toString()), temporaryStagingDirectoryPath);
if (shouldUseTemporaryDirectory(context, hadoopPath(targetPath), externalLocation.isPresent())) {
Location writePath = createTemporaryPath(context, hdfsEnvironment, hadoopPath(targetPath), temporaryStagingDirectoryPath);
return new LocationHandle(targetPath, writePath, STAGE_AND_MOVE_TO_TARGET_DIRECTORY);
}
return new LocationHandle(targetPath, targetPath, DIRECT_TO_TARGET_NEW_DIRECTORY);
Expand All @@ -94,8 +95,8 @@ public LocationHandle forExistingTable(SemiTransactionalHiveMetastore metastore,
HdfsContext context = new HdfsContext(session);
Location targetPath = Location.of(table.getStorage().getLocation());

if (shouldUseTemporaryDirectory(context, new Path(targetPath.toString()), false) && !isTransactionalTable(table.getParameters())) {
Location writePath = createTemporaryPath(context, hdfsEnvironment, new Path(targetPath.toString()), temporaryStagingDirectoryPath);
if (shouldUseTemporaryDirectory(context, hadoopPath(targetPath), false) && !isTransactionalTable(table.getParameters())) {
Location writePath = createTemporaryPath(context, hdfsEnvironment, hadoopPath(targetPath), temporaryStagingDirectoryPath);
return new LocationHandle(targetPath, writePath, STAGE_AND_MOVE_TO_TARGET_DIRECTORY);
}
return new LocationHandle(targetPath, targetPath, DIRECT_TO_TARGET_EXISTING_DIRECTORY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,45 @@ public void testCtasWithIncorrectLocation()
.hasStackTraceContaining("Fragment is not allowed in a file system location");
}

@Test
public void testCreateTableWithDoubleSlash()
{
String schemaName = "test_create_table_with_double_slash_" + randomNameSuffix();
String schemaLocation = "s3://%s/%s/double_slash//test_schema".formatted(bucketName, schemaName);
String tableName = "test_create_table_with_double_slash_" + randomNameSuffix();
String schemaTableName = schemaName + "." + tableName;

// Previously, HiveLocationService replaced double slash with single slash
assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')");
String existingKey = "%s/double_slash/test_schema/%s".formatted(schemaName, tableName);
s3.putObject(bucketName, existingKey, "test content");

assertUpdate("CREATE TABLE " + schemaTableName + "(col_int int)");
assertUpdate("INSERT INTO " + schemaTableName + " VALUES 1", 1);
assertQuery("SELECT * FROM " + schemaTableName, "VALUES 1");
assertUpdate("DROP TABLE " + schemaTableName);
s3.deleteObject(bucketName, existingKey);
}

@Test
public void testCtasWithDoubleSlash()
{
String schemaName = "test_ctas_with_double_slash_" + randomNameSuffix();
String schemaLocation = "s3://%s/%s/double_slash//test_schema".formatted(bucketName, schemaName);
String tableName = "test_create_table_with_double_slash_" + randomNameSuffix();
String schemaTableName = schemaName + "." + tableName;

// Previously, HiveLocationService replaced double slash with single slash
assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')");
String existingKey = "%s/double_slash/test_schema/%s".formatted(schemaName, tableName);
s3.putObject(bucketName, existingKey, "test content");

assertUpdate("CREATE TABLE " + schemaTableName + " AS SELECT 1 AS col_int", 1);
assertQuery("SELECT * FROM " + schemaTableName, "VALUES 1");
assertUpdate("DROP TABLE " + schemaTableName);
s3.deleteObject(bucketName, existingKey);
}

@Test
public void testCreateSchemaWithIncorrectLocation()
{
Expand Down

0 comments on commit 488e5c1

Please sign in to comment.