Skip to content

Commit

Permalink
Fix TestHiveSkipEmptyFiles methods refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
cvarelad-denodo authored and tdcmeehan committed Jun 13, 2024
1 parent 24d3b6c commit 6ed1e6f
Showing 1 changed file with 54 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.testng.annotations.Test;

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -94,8 +95,8 @@ private void generateMetadataDirectory()
throws Exception
{
temporaryDirectory = createTempDir();
generateMetadata("skip_empty_files_success");
generateMetadata("skip_empty_files_fail");
generateMetadata(queryRunner, "skip_empty_files_success");
generateMetadata(queryFailRunner, "skip_empty_files_fail");
generateBucketedMetadataWithEmptyFiles(queryBucketRunner, "skip_empty_files_bucket_success", false);
generateBucketedMetadataWithEmptyFiles(queryBucketFailRunner, "skip_empty_files_bucket_insert_fail", false);
generateBucketedMetadataWithEmptyFiles(queryBucketFailRunner, "skip_empty_files_bucket_replace_fail", true);
Expand Down Expand Up @@ -188,10 +189,18 @@ private void createQueryBucketFailRunner()
*
* @param tableName a {@link String} containing the desired table name
*/
private void generateMetadata(String tableName)
private void generateMetadata(DistributedQueryRunner queryRunner, String tableName)
throws Exception
{
Path tempDirectory = Files.createDirectory(Paths.get(temporaryDirectory.getPath(), tableName));
@Language("SQL") String createQuery = format(
"CREATE TABLE %s.\"%s\".\"%s\" (field %s) WITH (external_location = '%s')",
CATALOG,
SCHEMA,
tableName,
IntegerType.INTEGER,
temporaryDirectory.toURI() + tableName);
queryRunner.execute(createQuery);
Path firstParquetFile = createTempFile(tempDirectory, randomUUID().toString(), randomUUID().toString());
ParquetTester.writeParquetFileFromPresto(firstParquetFile.toFile(),
ImmutableList.of(IntegerType.INTEGER),
Expand Down Expand Up @@ -228,133 +237,89 @@ private void generateBucketedMetadataWithEmptyFiles(DistributedQueryRunner query
queryRunner.execute(insertQuery);
}
if (replaceDataFileByEmptyFile) {
Files.delete(Arrays.stream(requireNonNull(partitionDirectory.toFile().listFiles((dir, name) -> !name.endsWith(".crc")))).iterator().next().toPath());
FilenameFilter filenameFilter = (dir, name) -> !name.endsWith(".crc");
File[] filteredFiles = partitionDirectory.toFile().listFiles(filenameFilter);
Files.delete(Arrays.stream(requireNonNull(filteredFiles)).iterator().next().toPath());
}
createTempFile(partitionDirectory, randomUUID().toString(), randomUUID().toString());
}

/**
* Tries a table with the configuration property desired. If succeeds, tests the output.
* Finally, it drops the table.
*/
@Test
public void testSkipEmptyFilesSuccessful()
{
String tableName = "skip_empty_files_success";
executeCreationTestAndDropCycle(queryRunner, tableName, temporaryDirectory.toURI() + tableName);
try {
@Language("SQL") String selectQuery = format("SELECT \"$path\" FROM %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, "skip_empty_files_success");
MaterializedResult result = queryRunner.execute(selectQuery);
assertEquals(1, result.getRowCount());
}
finally {
dropTable(queryRunner, "skip_empty_files_success");
}
}

@Test
public void testSkipEmptyFilesError()
{
String tableName = "skip_empty_files_fail";
executeCreationTestAndDropCycleFail(queryFailRunner, tableName, temporaryDirectory.toURI() + tableName);
try {
@Language("SQL") String selectQuery = format("SELECT * FROM %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, "skip_empty_files_fail");
assertQueryFails(queryFailRunner, selectQuery, ".* is not a valid Parquet File");
}
finally {
dropTable(queryFailRunner, "skip_empty_files_fail");
}
}

@Test
public void testSkipEmptyFilesBucketSuccessful()
{
String tableName = "skip_empty_files_bucket_success";
checkBucketedResult(queryBucketRunner, tableName);
}

@Test
public void testSkipEmptyFilesBucketInsertFileFail()
{
String tableName = "skip_empty_files_bucket_insert_fail";
checkBucketedResultFail(queryBucketFailRunner, tableName, ".* is corrupt.* does not match the standard naming pattern, and the number of files in the directory .* does not match the declared bucket count.*");
}

@Test
public void testSkipEmptyFilesBucketReplaceFileFail()
{
String tableName = "skip_empty_files_bucket_replace_fail";
checkBucketedResultFail(queryBucketFailRunner, tableName, ".* is not a valid Parquet File");
}

/**
* Tries a table with the configuration property desired. If succeeds, tests the output.
* Finally, it drops the table.
*
* @param queryRunner a {@link QueryRunner} with the desired configuration properties
* @param tableName a {@link String} containing the desired table name
*/
private void checkBucketedResult(DistributedQueryRunner queryRunner, String tableName)
{
try {
@Language("SQL") String selectQuery = format("SELECT * FROM %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, tableName);
MaterializedResult result = queryRunner.execute(selectQuery);
SCHEMA, "skip_empty_files_bucket_success");
MaterializedResult result = queryBucketRunner.execute(selectQuery);
assertEquals(5, result.getRowCount());
}
finally {
@Language("SQL") String dropQuery = format("DROP TABLE IF EXISTS %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, tableName);
queryRunner.execute(dropQuery);
dropTable(queryBucketRunner, "skip_empty_files_bucket_success");
}
}

private void checkBucketedResultFail(DistributedQueryRunner queryRunner, String tableName, @Language("RegExp") String errorMessage)
@Test
public void testSkipEmptyFilesBucketInsertFileFail()
{
try {
@Language("SQL") String selectQuery = format("SELECT * FROM %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, tableName);
assertQueryFails(queryRunner, selectQuery, errorMessage);
SCHEMA, "skip_empty_files_bucket_insert_fail");
assertQueryFails(queryBucketFailRunner, selectQuery, ".* is corrupt.* does not match the standard naming pattern," +
" and the number of files in the directory .* does not match the declared bucket count.*");
}
finally {
@Language("SQL") String dropQuery = format("DROP TABLE IF EXISTS %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, tableName);
queryRunner.execute(dropQuery);
dropTable(queryBucketFailRunner, "skip_empty_files_bucket_insert_fail");
}
}

/**
* Tries a table with the configuration property desired. If succeeds, tests the output.
* Finally, it drops the table.
*
* @param queryRunner a {@link QueryRunner} with the desired configuration properties
* @param tableName a {@link String} containing the desired table name
* @param externalLocation a {@link String} with the external location to create the table against it
*/
private void executeCreationTestAndDropCycle(DistributedQueryRunner queryRunner, String tableName, String externalLocation)
@Test
public void testSkipEmptyFilesBucketReplaceFileFail()
{
try {
@Language("SQL") String createQuery = format(
"CREATE TABLE %s.\"%s\".\"%s\" (field %s) WITH (external_location = '%s')",
CATALOG,
SCHEMA,
tableName,
IntegerType.INTEGER,
externalLocation);
queryRunner.execute(createQuery);
@Language("SQL") String selectQuery = format("SELECT * FROM %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, tableName);
MaterializedResult result = queryRunner.execute(selectQuery);
assertEquals(1, result.getRowCount());
SCHEMA, "skip_empty_files_bucket_replace_fail");
assertQueryFails(queryBucketFailRunner, selectQuery, ".* is not a valid Parquet File");
}
finally {
@Language("SQL") String dropQuery = format("DROP TABLE IF EXISTS %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, tableName);
queryRunner.execute(dropQuery);
dropTable(queryBucketFailRunner, "skip_empty_files_bucket_replace_fail");
}
}

private void executeCreationTestAndDropCycleFail(DistributedQueryRunner queryRunner, String tableName, String externalLocation)
private void dropTable(DistributedQueryRunner queryRunner, String tableName)
{
try {
@Language("RegExp") String errorMessage = ".* is not a valid Parquet File";
@Language("SQL") String createQuery = format(
"CREATE TABLE %s.\"%s\".\"%s\" (field %s) WITH (external_location = '%s')",
CATALOG,
SCHEMA,
tableName,
IntegerType.INTEGER,
externalLocation);
queryRunner.execute(createQuery);
@Language("SQL") String selectQuery = format("SELECT * FROM %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, tableName);
assertQueryFails(queryRunner, selectQuery, errorMessage);
}
finally {
@Language("SQL") String dropQuery = format("DROP TABLE IF EXISTS %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, tableName);
queryRunner.execute(dropQuery);
}
@Language("SQL") String dropQuery = format("DROP TABLE IF EXISTS %s.\"%s\".\"%s\"", CATALOG,
SCHEMA, tableName);
queryRunner.execute(dropQuery);
}
}

0 comments on commit 6ed1e6f

Please sign in to comment.