Skip to content

Commit

Permalink
feat: add getConsumedFileSize method for calculate physical space (in…
Browse files Browse the repository at this point in the history
…clude ErasureCoding)
  • Loading branch information
Малыхин Максим Владиславович committed Feb 3, 2025
1 parent 01f7fe1 commit 38ead32
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 42 deletions.
25 changes: 25 additions & 0 deletions lib/src/main/java/de/m3y/hadoop/hdfs/hfsa/util/FsUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto;
Expand Down Expand Up @@ -63,6 +65,7 @@ public static int getFileReplication(FsImageProto.INodeSection.INodeFile file) {

/**
* Formats the permission as octal.
*
* @param permission the permission.
* @return the formatted octal value.
*/
Expand Down Expand Up @@ -97,4 +100,26 @@ public static long getFileSize(FsImageProto.INodeSection.INodeFile file) {
}
return size;
}

/**
* Computes the consumed file size for all blocks.
*
* @param file the file.
* @return the consumed size in bytes.
*/
public static long getConsumedFileSize(FsImageProto.INodeSection.INodeFile file) {
long size = 0;
if (file.hasErasureCodingPolicyID()) {
ErasureCodingPolicy ecp = SystemErasureCodingPolicies.getByID((byte) file.getErasureCodingPolicyID());
for (HdfsProtos.BlockProto p : file.getBlocksList()) {
size += p.getNumBytes();
double cells = Math.ceil((double) p.getNumBytes() / ecp.getCellSize()); // count of cells
long rows = (long) Math.ceil(cells / ecp.getNumDataUnits()); // count group of cells (rows)
size += rows * ecp.getNumParityUnits() * ecp.getCellSize();
}
} else {
size = getFileSize(file) * file.getReplication();
}
return size;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ abstract static class AbstractStats {
final LongAdder sumSymLinks = new LongAdder();
long sumBlocks;
long sumFileSize;
long sumConsumedFileSize;
final SizeBucket fileSizeBuckets;

static final Comparator<AbstractStats> COMPARATOR_BLOCKS = Comparator.comparingLong(o -> o.sumBlocks);
Expand Down Expand Up @@ -167,16 +168,16 @@ void doSummary(Report report) {
final String bucketHeader = String.format(bucketFormatHeader, (Object[]) bucketUnits);

out.println(
"#Groups | #Users | #Directories | #Symlinks | #Files | Size [MB] | #Blocks | File Size Buckets ");
"#Groups | #Users | #Directories | #Symlinks | #Files | Size [MB] | CSize[MB] | #Blocks | File Size Buckets ");
String header2ndLine =
" | | | | | | | " + bucketHeader;
" | | | | | | | | " + bucketHeader;
out.println(header2ndLine);
out.println(FormatUtil.padRight('-', header2ndLine.length()));

out.printf("%8d | %11d | %12d | %9d | %10d | %9d | %9d | %s%n",
out.printf("%8d | %11d | %12d | %9d | %10d | %9d | %9d | %9d | %s%n",
report.groupStats.size(), report.userStats.size(),
overallStats.sumDirectories.longValue(), overallStats.sumSymLinks.longValue(),
overallStats.sumFiles, overallStats.sumFileSize / 1024L / 1024L,
overallStats.sumFiles, overallStats.sumFileSize / 1024L / 1024L, overallStats.sumConsumedFileSize / 1024L / 1024L,
overallStats.sumBlocks,
String.format(bucketFormatValue,
FormatUtil.boxAndPadWithZeros(maxLength.length, overallStats.fileSizeBuckets.get()))
Expand All @@ -185,16 +186,16 @@ void doSummary(Report report) {

// Groups
out.printf(
"By group: %8d | #Directories | #SymLinks | #File | Size [MB] | #Blocks | File Size Buckets%n",
"By group: %8d | #Directories | #SymLinks | #File | Size [MB] | CSize[MB] | #Blocks | File Size Buckets%n",
report.groupStats.size());
header2ndLine = " " +
" | | | | | | " + bucketHeader;
" | | | | | | | " + bucketHeader;
out.println(header2ndLine);
out.println(FormatUtil.padRight('-', header2ndLine.length()));
for (GroupStats stat : sortStats(report.groupStats.values(), sort.getComparator())) {
out.printf("%22s | %10d | %9d | %10d | %9d | %9d | %s%n",
out.printf("%22s | %10d | %9d | %10d | %9d | %9d | %9d | %s%n",
stat.groupName, stat.sumDirectories.longValue(), stat.sumSymLinks.longValue(),
stat.sumFiles, stat.sumFileSize / 1024L / 1024L,
stat.sumFiles, stat.sumFileSize / 1024L / 1024L, stat.sumConsumedFileSize / 1024L / 1024L,
stat.sumBlocks,
String.format(bucketFormatValue,
FormatUtil.boxAndPadWithZeros(maxLength.length, stat.fileSizeBuckets.get()))
Expand All @@ -205,16 +206,16 @@ void doSummary(Report report) {
out.println();
final List<UserStats> userStats = filterByUserName(report.userStats.values(), mainCommand.userNameFilter);
out.printf(
"By user: %8d | #Directories | #SymLinks | #File | Size [MB] | #Blocks | File Size Buckets%n",
"By user: %8d | #Directories | #SymLinks | #File | Size [MB] | CSize[MB] | #Blocks | File Size Buckets%n",
userStats.size());
header2ndLine = " " +
" | | | | | | " + bucketHeader;
" | | | | | | | " + bucketHeader;
out.println(header2ndLine);
out.println(FormatUtil.padRight('-', header2ndLine.length()));
for (UserStats stat : sortStats(userStats, sort.getComparator())) {
out.printf("%22s | %10d | %9d | %10d | %9d | %9d | %s%n",
out.printf("%22s | %10d | %9d | %10d | %9d | %9d | %9d | %s%n",
stat.userName, stat.sumDirectories.longValue(), stat.sumSymLinks.longValue(),
stat.sumFiles, stat.sumFileSize / 1024L / 1024L,
stat.sumFiles, stat.sumFileSize / 1024L / 1024L, stat.sumConsumedFileSize / 1024L / 1024L,
stat.sumBlocks,
String.format(bucketFormatValue,
FormatUtil.boxAndPadWithZeros(maxLength.length, stat.fileSizeBuckets.get()))
Expand Down Expand Up @@ -244,11 +245,13 @@ public void onFile(FsImageProto.INodeSection.INode inode, String path) {
PermissionStatus p = fsImageData.getPermissionStatus(f.getPermission());

final long fileSize = FsUtil.getFileSize(f);
final long consumedSize = FsUtil.getConsumedFileSize(f);
final long fileBlocks = f.getBlocksCount();
synchronized (overallStats) {
overallStats.fileSizeBuckets.add(fileSize);
overallStats.sumBlocks += fileBlocks;
overallStats.sumFileSize += fileSize;
overallStats.sumConsumedFileSize += consumedSize;
overallStats.sumFiles++;
}

Expand All @@ -258,6 +261,7 @@ public void onFile(FsImageProto.INodeSection.INode inode, String path) {
synchronized (groupStat) {
groupStat.sumFiles++;
groupStat.sumFileSize += fileSize;
groupStat.sumConsumedFileSize += consumedSize;
groupStat.fileSizeBuckets.add(fileSize);
groupStat.sumBlocks += fileBlocks;
}
Expand All @@ -268,6 +272,7 @@ public void onFile(FsImageProto.INodeSection.INode inode, String path) {
synchronized (userStat) {
userStat.sumFiles++;
userStat.sumFileSize += fileSize;
userStat.sumConsumedFileSize += consumedSize;
userStat.fileSizeBuckets.add(fileSize);
userStat.sumBlocks += fileBlocks;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,24 @@ public void testRun() {
"HDFS Summary : /\n" +
"----------------\n" +
"\n" +
"#Groups | #Users | #Directories | #Symlinks | #Files | Size [MB] | #Blocks | File Size Buckets \n" +
" | | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"----------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" 3 | 3 | 8 | 0 | 11 | 331 | 12 | 0 2 1 2 1 0 2 1 1 1\n" +
"#Groups | #Users | #Directories | #Symlinks | #Files | Size [MB] | CSize[MB] | #Blocks | File Size Buckets \n" +
" | | | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"----------------------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" 3 | 3 | 8 | 0 | 11 | 331 | 331 | 12 | 0 2 1 2 1 0 2 1 1 1\n" +
"\n" +
"By group: 3 | #Directories | #SymLinks | #File | Size [MB] | #Blocks | File Size Buckets\n" +
" | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"---------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" root | 0 | 0 | 1 | 0 | 1 | 0 1 0 0 0 0 0 0 0 0\n" +
" supergroup | 8 | 0 | 8 | 151 | 8 | 0 1 1 2 1 0 1 1 1 0\n" +
" nobody | 0 | 0 | 2 | 180 | 3 | 0 0 0 0 0 0 1 0 0 1\n" +
"By group: 3 | #Directories | #SymLinks | #File | Size [MB] | CSize[MB] | #Blocks | File Size Buckets\n" +
" | | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" root | 0 | 0 | 1 | 0 | 0 | 1 | 0 1 0 0 0 0 0 0 0 0\n" +
" supergroup | 8 | 0 | 8 | 151 | 151 | 8 | 0 1 1 2 1 0 1 1 1 0\n" +
" nobody | 0 | 0 | 2 | 180 | 180 | 3 | 0 0 0 0 0 0 1 0 0 1\n" +
"\n" +
"By user: 3 | #Directories | #SymLinks | #File | Size [MB] | #Blocks | File Size Buckets\n" +
" | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"---------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" root | 0 | 0 | 1 | 0 | 1 | 0 1 0 0 0 0 0 0 0 0\n" +
" foo | 0 | 0 | 1 | 160 | 2 | 0 0 0 0 0 0 0 0 0 1\n" +
" mm | 8 | 0 | 9 | 171 | 9 | 0 1 1 2 1 0 2 1 1 0\n"
"By user: 3 | #Directories | #SymLinks | #File | Size [MB] | CSize[MB] | #Blocks | File Size Buckets\n" +
" | | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" root | 0 | 0 | 1 | 0 | 0 | 1 | 0 1 0 0 0 0 0 0 0 0\n" +
" foo | 0 | 0 | 1 | 160 | 160 | 2 | 0 0 0 0 0 0 0 0 0 1\n" +
" mm | 8 | 0 | 9 | 171 | 171 | 9 | 0 1 1 2 1 0 2 1 1 0\n"
);
}
}
Expand All @@ -72,22 +72,22 @@ public void testRunWithFilterForUserFoo() {
"HDFS Summary : /\n" +
"----------------\n" +
"\n" +
"#Groups | #Users | #Directories | #Symlinks | #Files | Size [MB] | #Blocks | File Size Buckets \n" +
" | | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"----------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" 3 | 3 | 8 | 0 | 11 | 331 | 12 | 0 2 1 2 1 0 2 1 1 1\n" +
"#Groups | #Users | #Directories | #Symlinks | #Files | Size [MB] | CSize[MB] | #Blocks | File Size Buckets \n" +
" | | | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"----------------------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" 3 | 3 | 8 | 0 | 11 | 331 | 331 | 12 | 0 2 1 2 1 0 2 1 1 1\n" +
"\n" +
"By group: 3 | #Directories | #SymLinks | #File | Size [MB] | #Blocks | File Size Buckets\n" +
" | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"---------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" root | 0 | 0 | 1 | 0 | 1 | 0 1 0 0 0 0 0 0 0 0\n" +
" supergroup | 8 | 0 | 8 | 151 | 8 | 0 1 1 2 1 0 1 1 1 0\n" +
" nobody | 0 | 0 | 2 | 180 | 3 | 0 0 0 0 0 0 1 0 0 1\n" +
"By group: 3 | #Directories | #SymLinks | #File | Size [MB] | CSize[MB] | #Blocks | File Size Buckets\n" +
" | | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" root | 0 | 0 | 1 | 0 | 0 | 1 | 0 1 0 0 0 0 0 0 0 0\n" +
" supergroup | 8 | 0 | 8 | 151 | 151 | 8 | 0 1 1 2 1 0 1 1 1 0\n" +
" nobody | 0 | 0 | 2 | 180 | 180 | 3 | 0 0 0 0 0 0 1 0 0 1\n" +
"\n" +
"By user: 1 | #Directories | #SymLinks | #File | Size [MB] | #Blocks | File Size Buckets\n" +
" | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"---------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" foo | 0 | 0 | 1 | 160 | 2 | 0 0 0 0 0 0 0 0 0 1\n"
"By user: 1 | #Directories | #SymLinks | #File | Size [MB] | CSize[MB] | #Blocks | File Size Buckets\n" +
" | | | | | | | 0 B 1 MiB 2 MiB 4 MiB 8 MiB 16 MiB 32 MiB 64 MiB 128 MiB 256 MiB\n" +
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------\n" +
" foo | 0 | 0 | 1 | 160 | 160 | 2 | 0 0 0 0 0 0 0 0 0 1\n"
);
}
}
Expand Down

0 comments on commit 38ead32

Please sign in to comment.