Skip to content

Commit

Permalink
Change C++ singlejar to use stdio for all output, instead of a mix of
Browse files Browse the repository at this point in the history
sendfile() and write() without buffering.  sendfile() is not a speedup
for typical jar contents; the average compressed class file is less than
2KB, so memcpy()ing the data into a large buffer is cheaper than the
extra system calls.

The existing non-sendfile code was making four calls to lseek() per
archive member.  I removed all of those by using pread() or caching the
output position.

Configure stdio to use a 128KB buffer to make fewer write() calls to the
output file.  This is a noticeable speedup over the default when writing
to a fuse filesystem.  (I think this wouldn't be necessary if our fuse
filesystems would set st_blksize appropriately in stat() results.)

Remove needless thread-hostile calls to umask().

--
MOS_MIGRATED_REVID=133057985
  • Loading branch information
Googler authored and dslomov committed Sep 14, 2016
1 parent f92ecb3 commit b4cf5e3
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 91 deletions.
145 changes: 58 additions & 87 deletions src/tools/singlejar/output_jar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(__linux)
#include <sys/sendfile.h>
#endif
#include <sys/stat.h>
#include <time.h>
#include <unistd.h>
Expand All @@ -45,7 +42,9 @@

OutputJar::OutputJar()
: options_(nullptr),
fd_(-1),
file_(nullptr),
outpos_(0),
buffer_(nullptr),
entries_(0),
duplicate_entries_(0),
cen_(nullptr),
Expand Down Expand Up @@ -105,10 +104,13 @@ int OutputJar::Doit(Options *options) {
const char *const launcher_path = options_->java_launcher.c_str();
int in_fd = open(launcher_path, O_RDONLY);
struct stat statbuf;
if (fd_ < 0 || fstat(in_fd, &statbuf)) {
if (file_ == nullptr || fstat(in_fd, &statbuf)) {
diag_err(1, "%s", launcher_path);
}
ssize_t byte_count = AppendFile(in_fd, nullptr, statbuf.st_size);
// TODO(asmundak): Consider going back to sendfile() or reflink
// (BTRFS_IOC_CLONE/XFS_IOC_CLONE) here. The launcher preamble can
// be very large for targets with many native deps.
ssize_t byte_count = AppendFile(in_fd, 0, statbuf.st_size);
if (byte_count < 0) {
diag_err(1, "%s:%d: Cannot copy %s to %s", __FILE__, __LINE__,
launcher_path, options_->output_jar.c_str());
Expand Down Expand Up @@ -214,24 +216,34 @@ int OutputJar::Doit(Options *options) {
}

OutputJar::~OutputJar() {
if (fd_ >= 0) {
if (file_) {
diag_warnx("%s:%d: Close() should be called first", __FILE__, __LINE__);
}
}

// Try to perform I/O in units of this size.
// (128KB is the default max request size for fuse filesystems.)
static const size_t kBufferSize = 128<<10;

bool OutputJar::Open() {
if (fd_ >= 0) {
if (file_) {
diag_errx(1, "%s:%d: Cannot open output archive twice", __FILE__, __LINE__);
}
// The output file has read/write/execute permissions for the owner,
// default for the rest.
mode_t old_umask = umask(0);
fd_ = creat(path(), (S_IRWXU | S_IRWXG | S_IRWXO) & ~old_umask);
umask(old_umask);
if (fd_ < 0) {
// Set execute bits since we may produce an executable output file.
int fd = open(path(), O_CREAT|O_WRONLY|O_TRUNC, 0777);
if (fd < 0) {
diag_warn("%s:%d: %s", __FILE__, __LINE__, path());
return false;
}
file_ = fdopen(fd, "w");
if (file_ == nullptr) {
diag_warn("%s:%d: fdopen of %s", __FILE__, __LINE__, path());
close(fd);
return false;
}
outpos_ = 0;
buffer_.reset(new char[kBufferSize]);
setbuffer(file_, buffer_.get(), kBufferSize);
if (options_->verbose) {
fprintf(stderr, "Writing to %s\n", path());
}
Expand Down Expand Up @@ -414,7 +426,7 @@ bool OutputJar::AddJar(int jar_path_index) {
lh_new->last_mod_file_date(33);
lh_new->last_mod_file_time(normalized_time);
// Now write these few bytes and adjust read/write positions accordingly.
if (!WriteBytes(reinterpret_cast<uint8_t *>(lh_new), lh_new->size())) {
if (!WriteBytes(lh_new, lh_new->size())) {
diag_err(1, "%s:%d: Cannot copy modified local header for %.*s",
__FILE__, __LINE__, file_name_length, file_name);
}
Expand All @@ -425,9 +437,8 @@ bool OutputJar::AddJar(int jar_path_index) {
}
}

// Do the actual copy. Use sendfile, avoiding copying the data to user
// space and back.
ssize_t n_copied = AppendFile(input_jar.fd(), &copy_from, num_bytes);
// Do the actual copy.
ssize_t n_copied = AppendFile(input_jar.fd(), copy_from, num_bytes);
if (n_copied < 0) {
diag_err(1, "%s:%d: Cannot copy %ld bytes of %.*s from %s", __FILE__,
__LINE__, num_bytes, file_name_length, file_name,
Expand Down Expand Up @@ -474,12 +485,14 @@ bool OutputJar::AddJar(int jar_path_index) {
}

off_t OutputJar::Position() {
off_t position = lseek(fd_, 0, SEEK_CUR);
if (position == (off_t)-1) {
diag_err(1, "%s:%d: lseek", __FILE__, __LINE__);
}
TODO(position < 0xFFFFFFFF, "Handle Zip64");
return position;
if (file_ == nullptr) {
diag_err(1, "%s:%d: output file is not open", __FILE__, __LINE__);
}
// You'd think this could be "return ftell(file_);", but that
// generates a needless call to lseek. So instead we cache our
// current position in the output.
TODO(outpos_ < 0xFFFFFFFF, "Handle Zip64");
return outpos_;
}

// Writes an entry. The argument is the pointer to the contiguos block of
Expand Down Expand Up @@ -600,7 +613,7 @@ uint8_t *OutputJar::ReserveCdh(size_t size) {

// Write out combined jar.
bool OutputJar::Close() {
if (fd_ < 0) {
if (file_ == nullptr) {
return true;
}

Expand All @@ -614,10 +627,7 @@ bool OutputJar::Close() {
WriteEntry(spring_schemas_.OutputEntry(options_->force_compression));
WriteEntry(protobuf_meta_handler_.OutputEntry(options_->force_compression));
// TODO(asmundak): handle manifest;
off_t output_position = lseek(fd_, 0, SEEK_CUR);
if (output_position == (off_t)-1) {
diag_err(1, "%s:%d: lseek", __FILE__, __LINE__);
}
off_t output_position = Position();
bool write_zip64_ecd = output_position >= 0xFFFFFFFF || entries_ >= 0xFFFF ||
cen_size_ >= 0xFFFFFFFF;

Expand Down Expand Up @@ -667,13 +677,14 @@ bool OutputJar::Close() {
}
free(cen_);

if (close(fd_)) {
if (fclose(file_)) {
diag_err(1, "%s:%d: %s", __FILE__, __LINE__, path());
fd_ = -1;
return false;
}
file_ = nullptr;
// Free the buffer only after fclose(); stdio may flush data from the
// buffer on close.
buffer_.reset();

fd_ = -1;
if (options_->verbose) {
fprintf(stderr, "Wrote %s with %d entries", path(), entries_);
if (duplicate_entries_) {
Expand Down Expand Up @@ -709,84 +720,44 @@ void OutputJar::ClasspathResource(const std::string &resource_name,
known_members_.emplace(resource_name, EntryInfo{classpath_resource});
}

#if defined(__APPLE__)
ssize_t OutputJar::AppendFile(int in_fd, off_t *in_offset, size_t count) {
if (!count) {
ssize_t OutputJar::AppendFile(int in_fd, off_t offset, size_t count) {
if (count == 0) {
return 0;
}
uint8_t buffer[8192];
std::unique_ptr<void, decltype(free)*> buffer(malloc(kBufferSize), free);
if (buffer == nullptr) {
diag_err(1, "%s:%d: malloc", __FILE__, __LINE__);
}
ssize_t total_written = 0;

// If the input file position (the offset in the input file) has been passed,
// that's where we start, and the input file position has to be restored after
// we are done copying.
const off_t offset_error = static_cast<off_t>(-1);
off_t old_input_offset = offset_error;
if (in_offset) {
if (offset_error == (old_input_offset = lseek(in_fd, 0, SEEK_CUR)) ||
offset_error == lseek(in_fd, *in_offset, SEEK_SET)) {
return -1;
}
}
while (total_written < count) {
ssize_t n_read =
read(in_fd, buffer, std::min(sizeof(buffer), count - total_written));
size_t len = std::min(kBufferSize, count - total_written);
ssize_t n_read = pread(in_fd, buffer.get(), len, offset + total_written);
if (n_read > 0) {
if (!WriteBytes(buffer, n_read)) {
if (!WriteBytes(buffer.get(), n_read)) {
return -1;
}
total_written += n_read;
} else if (n_read == 0) {
break;
} else if (EAGAIN != errno) {
} else {
return -1;
}
}

// If the input file position has been passed, update it and restore
// the read position in the input file.
if (in_offset) {
if (offset_error == lseek(in_fd, old_input_offset, SEEK_SET)) {
return -1;
}
*in_offset += total_written;
}
return total_written;
}

#elif defined(__linux)
ssize_t OutputJar::AppendFile(int in_fd, off_t *in_offset, size_t count) {
// sendfile call is interruptable and has to be handled the same way as write
// call.
for (size_t to_write = count; to_write > 0;) {
ssize_t written = sendfile(fd_, in_fd, in_offset, to_write);
if (written < 0) {
return written;
} else if (written == 0) {
return static_cast<ssize_t>(count - to_write);
}
to_write -= static_cast<size_t>(written);
}
return static_cast<ssize_t>(count);
}
#endif

void OutputJar::ExtraCombiner(const std::string &entry_name,
Combiner *combiner) {
extra_combiners_.emplace_back(combiner);
known_members_.emplace(entry_name, EntryInfo{combiner});
}

bool OutputJar::WriteBytes(uint8_t *buffer, size_t count) {
for (uint8_t *buffer_end = buffer + count; buffer < buffer_end;) {
ssize_t n_written = write(fd_, buffer, buffer_end - buffer);
if (n_written > 0) {
buffer += n_written;
} else if (EAGAIN == errno) {
return false;
}
}
return true;
bool OutputJar::WriteBytes(void *buffer, size_t count) {
size_t written = fwrite(buffer, 1, count, file_);
outpos_ += written;
return written == count;
}

void OutputJar::ExtraHandler(const CDH *) {}
11 changes: 7 additions & 4 deletions src/tools/singlejar/output_jar.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define SRC_TOOLS_SINGLEJAR_COMBINED_JAR_H_

#include <stdint.h>
#include <stdio.h>
#include <memory>
#include <string>
#include <unordered_map>
Expand Down Expand Up @@ -83,10 +84,10 @@ class OutputJar {
// Set classpath resource with given resource name and path.
void ClasspathResource(const std::string& resource_name,
const std::string& resource_path);
// Copy the bytes from the given file.
ssize_t AppendFile(int in_fd, off_t *in_offset, size_t count);
// Copy 'count' bytes starting at 'offset' from the given file.
ssize_t AppendFile(int in_fd, off_t offset, size_t count);
// Write bytes to the output file, return true on success.
bool WriteBytes(uint8_t *buffer, size_t count);
bool WriteBytes(void *buffer, size_t count);


Options *options_;
Expand All @@ -98,7 +99,9 @@ class OutputJar {
};

std::unordered_map<std::string, struct EntryInfo> known_members_;
int fd_;
FILE *file_;
off_t outpos_;
std::unique_ptr<char[]> buffer_;
int entries_;
int duplicate_entries_;
uint8_t *cen_;
Expand Down

0 comments on commit b4cf5e3

Please sign in to comment.